From c19c03fc749147f565e807fa65f1729066800571 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 4 Jun 2007 15:15:35 +1000 Subject: [POWERPC] unmap_vm_area becomes unmap_kernel_range for the public This makes unmap_vm_area static and a wrapper around a new exported unmap_kernel_range that takes an explicit range instead of a vm_area struct. This makes it more versatile for code that wants to play with kernel page tables outside of the standard vmalloc area. (One example is some rework of the PowerPC PCI IO space mapping code that depends on that patch and removes some code duplication and horrible abuse of forged struct vm_struct). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- include/linux/vmalloc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 4b7ee83787c1..132b260aef1e 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -65,9 +65,10 @@ extern struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, int node, gfp_t gfp_mask); extern struct vm_struct *remove_vm_area(void *addr); + extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); -extern void unmap_vm_area(struct vm_struct *area); +extern void unmap_kernel_range(unsigned long addr, unsigned long size); /* * Internals. Dont't use.. -- cgit v1.2.3 From d7ad2254fa7cc11aec3faeba076c1243f6adeb47 Mon Sep 17 00:00:00 2001 From: John Keller Date: Mon, 9 Jul 2007 11:42:24 -0700 Subject: [IA64] SN: Correct ROM resource length for BIOS copy On SN systems, when setting the IORESOURCE_ROM_BIOS_COPY resource flag, the resource length should be set to the actual size of the ROM image so that a call to pci_map_rom() returns the correct size. Signed-off-by: John Keller Signed-off-by: Andrew Morton Signed-off-by: Tony Luck --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 086a0e5a6318..acb9387c0364 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -560,6 +560,7 @@ void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size); void __iomem __must_check *pci_map_rom_copy(struct pci_dev *pdev, size_t *size); void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom); void pci_remove_rom(struct pci_dev *pdev); +size_t pci_get_rom_size(void __iomem *rom, size_t size); /* Power management related routines */ int pci_save_state(struct pci_dev *dev); -- cgit v1.2.3 From 149983af609e8f5c57157467baf8545d17b8a6a1 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Tue, 26 Jun 2007 15:55:28 +0300 Subject: mlx4_core: Get the maximum message size from reported device capabilities Get the maximum message size from the device capabilities returned from the QUERY_DEV_CAP firmware command, rather than hard-coding 2 GB. Signed-off-by: Dotan Barak Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b372f5910fc1..8209387ee854 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -172,6 +172,7 @@ struct mlx4_caps { int num_pds; int reserved_pds; int mtt_entry_sz; + u32 max_msg_sz; u32 page_size_cap; u32 flags; u16 stat_rate_support; -- cgit v1.2.3 From 80128ff79d282cf71b1819dbca9b8dd47d8ed3e8 Mon Sep 17 00:00:00 2001 From: Vitaly Bordug Date: Mon, 9 Jul 2007 11:37:35 -0700 Subject: [POWERPC] 8xx: mpc885ads pcmcia support Adds support for PowerQuicc on-chip PCMCIA. The driver is implemented as of_device, so only arch/powerpc stuff is capable to use it, which now implies only mpc885ads reference board. To cope with the code that should be hooked inside driver, but is really board specific (like set_voltage), global structure mpc8xx_pcmcia_ops holds necessary function pointers that are filled in the BSP code. [akpm@linux-foundation.org: whitespace diddles] Signed-off-by: Vitaly Bordug Acked-by: Arnd Bergmann Acked-by: Olof Johansson Cc: Dominik Brodowski Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Kumar Gala --- include/linux/fsl_devices.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 73710d617775..12e631f0fb77 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -120,5 +120,10 @@ struct fsl_spi_platform_data { u32 sysclk; }; +struct mpc8xx_pcmcia_ops { + void(*hw_ctrl)(int slot, int enable); + int(*voltage_set)(int slot, int vcc, int vpp); +}; + #endif /* _FSL_DEVICE_H_ */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From 4c62b53454a83178676e5ecae6665447d363c7b4 Mon Sep 17 00:00:00 2001 From: Satyam Sharma Date: Wed, 27 Jun 2007 16:02:14 +0530 Subject: configfs: misc cleanups 1. item.c:config_item_cleanup() is a private function (only called by config_item_release() in same file). However, it is spuriously exported in include/linux/configfs.h, so remove that export and make it static in item.c. Also, it is no longer exported / interface function, so no need to give comment for this function (the comment was stating obvious thing, anyway). 2. Kernel-doc comment format does not allow empty line between end of comment and start of function (declaration line). There were several such spurious empty lines in item.c, so fix them. fs/configfs/item.c | 15 +++------------ include/linux/configfs.h | 1 - 2 files changed, 3 insertions(+), 13 deletions(-) Signed-off-by: Satyam Sharma Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- include/linux/configfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index fef6f3d0a4a7..3d4a96eb0e9b 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -75,7 +75,6 @@ extern void config_item_init(struct config_item *); extern void config_item_init_type_name(struct config_item *item, const char *name, struct config_item_type *type); -extern void config_item_cleanup(struct config_item *); extern struct config_item * config_item_get(struct config_item *); extern void config_item_put(struct config_item *); -- cgit v1.2.3 From 9b1d9aa4e9c5cafe73b9df21d758b50b5d75264d Mon Sep 17 00:00:00 2001 From: Satyam Sharma Date: Wed, 4 Jul 2007 16:37:06 +0530 Subject: [PATCH] configfs+dlm: Separate out __CONFIGFS_ATTR into configfs.h fs/dlm/config.c contains a useful generic macro called __CONFIGFS_ATTR that is similar to sysfs' __ATTR macro that makes defining attributes easy for any user of configfs. Separate it out into configfs.h so that other users (forthcoming in dynamic netconsole patchset) can use it too. Signed-off-by: Satyam Sharma Cc: David Teigland Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- include/linux/configfs.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 3d4a96eb0e9b..def7c83d43a2 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -130,6 +130,22 @@ struct configfs_attribute { mode_t ca_mode; }; +/* + * Users often need to create attribute structures for their configurable + * attributes, containing a configfs_attribute member and function pointers + * for the show() and store() operations on that attribute. They can use + * this macro (similar to sysfs' __ATTR) to make defining attributes easier. + */ +#define __CONFIGFS_ATTR(_name, _mode, _show, _store) \ +{ \ + .attr = { \ + .ca_name = __stringify(_name), \ + .ca_mode = _mode, \ + .ca_owner = THIS_MODULE, \ + }, \ + .show = _show, \ + .store = _store, \ +} /* * If allow_link() exists, the item can symlink(2) out to other -- cgit v1.2.3 From 3fe6c5ce1176cf661dbe71fc43b627c1a742a89a Mon Sep 17 00:00:00 2001 From: Satyam Sharma Date: Wed, 4 Jul 2007 16:37:16 +0530 Subject: [PATCH] configfs+dlm: Rename config_group_find_obj and state semantics clearly Configfs being based upon sysfs code, config_group_find_obj() is probably so named because of the similar kset_find_obj() in sysfs. However, "kobject"s in sysfs become "config_item"s in configfs, so let's call it config_group_find_item() instead, for sake of uniformity, and make corresponding change in the users of this function. BTW a crucial difference between kset_find_obj and config_group_find_item is in locking expectations. kset_find_obj does its locking by itself, but config_group_find_item expects the *caller* to do the locking. The reason for this: kset's have their own locks, config_group's don't but instead rely on the subsystem mutex. And, subsystem needn't necessarily be around when config_group_find_item() is called. So let's state these locking semantics explicitly, and rectify the comment, otherwise bugs could continue to occur in future, as they did in the past (refer commit d82b8191e238 in gfs2-2.6-fixes.git). [ I also took the opportunity to fix some bad whitespace and double-empty lines. --Joel ] [ Conflict in fs/dlm/config.c with commit 3168b0780d06ace875696f8a648d04d6089654e5 manually resolved. --Mark ] Signed-off-by: Satyam Sharma Cc: David Teigland Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- include/linux/configfs.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index def7c83d43a2..bbb1b6cafa8b 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -86,12 +86,10 @@ struct config_item_type { struct configfs_attribute **ct_attrs; }; - /** * group - a group of config_items of a specific type, belonging * to a specific subsystem. */ - struct config_group { struct config_item cg_item; struct list_head cg_children; @@ -99,13 +97,11 @@ struct config_group { struct config_group **default_groups; }; - extern void config_group_init(struct config_group *group); extern void config_group_init_type_name(struct config_group *group, const char *name, struct config_item_type *type); - static inline struct config_group *to_config_group(struct config_item *item) { return item ? container_of(item,struct config_group,cg_item) : NULL; @@ -121,7 +117,8 @@ static inline void config_group_put(struct config_group *group) config_item_put(&group->cg_item); } -extern struct config_item *config_group_find_obj(struct config_group *, const char *); +extern struct config_item *config_group_find_item(struct config_group *, + const char *); struct configfs_attribute { -- cgit v1.2.3 From e6bd07aee739566803425acdbf5cdb29919164e1 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 6 Jul 2007 23:33:17 -0700 Subject: configfs: Convert subsystem semaphore to mutex Convert the su_sem member of struct configfs_subsystem to a struct mutex, as that's what it is. Also convert all the users and update Documentation/configfs.txt and Documentation/configfs_example.c accordingly. [ Conflict in fs/dlm/config.c with commit 3168b0780d06ace875696f8a648d04d6089654e5 manually resolved. --Mark ] Inspired-by: Satyam Sharma Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- include/linux/configfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index bbb1b6cafa8b..5ce0fc4e3b5b 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -40,9 +40,9 @@ #include #include #include +#include #include -#include #define CONFIGFS_ITEM_NAME_LEN 20 @@ -174,7 +174,7 @@ struct configfs_group_operations { struct configfs_subsystem { struct config_group su_group; - struct semaphore su_sem; + struct mutex su_mutex; }; static inline struct configfs_subsystem *to_configfs_subsystem(struct config_group *group) -- cgit v1.2.3 From 299894cc9001b09e3e9685f2709b49e7e1092ccc Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 6 Oct 2006 17:33:23 -0700 Subject: configfs: accessing item hierarchy during rmdir(2) Add a notification callback, ops->disconnect_notify(). It has the same prototype as ->drop_item(), but it will be called just before the item linkage is broken. This way, configfs users who want to do work while the object is still in the heirarchy have a chance. Client drivers will still need to config_item_put() in their ->drop_item(), if they implement it. They need do nothing in ->disconnect_notify(). They don't have to provide it if they don't care. But someone who wants to be notified before ci_parent is set to NULL can now be notified. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- include/linux/configfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 5ce0fc4e3b5b..8227e730dac7 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -169,6 +169,7 @@ struct configfs_group_operations { struct config_item *(*make_item)(struct config_group *group, const char *name); struct config_group *(*make_group)(struct config_group *group, const char *name); int (*commit_item)(struct config_item *item); + void (*disconnect_notify)(struct config_group *group, struct config_item *item); void (*drop_item)(struct config_group *group, struct config_item *item); }; -- cgit v1.2.3 From 631d1febab8e546e3bb800bdfe2c212b8adf87de Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Mon, 18 Jun 2007 18:06:09 -0700 Subject: configfs: config item dependancies. Sometimes other drivers depend on particular configfs items. For example, ocfs2 mounts depend on a heartbeat region item. If that region item is removed with rmdir(2), the ocfs2 mount must BUG or go readonly. Not happy. This provides two additional API calls: configfs_depend_item() and configfs_undepend_item(). A client driver can call configfs_depend_item() on an existing item to tell configfs that it is depended on. configfs will then return -EBUSY from rmdir(2) for that item. When the item is no longer depended on, the client driver calls configfs_undepend_item() on it. These API cannot be called underneath any configfs callbacks, as they will conflict. They can block and allocate. A client driver probably shouldn't calling them of its own gumption. Rather it should be providing an API that external subsystems call. How does this work? Imagine the ocfs2 mount process. When it mounts, it asks for a heart region item. This is done via a call into the heartbeat code. Inside the heartbeat code, the region item is looked up. Here, the heartbeat code calls configfs_depend_item(). If it succeeds, then heartbeat knows the region is safe to give to ocfs2. If it fails, it was being torn down anyway, and heartbeat can gracefully pass up an error. [ Fixed some bad whitespace in configfs.txt. --Mark ] Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- include/linux/configfs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 8227e730dac7..8c6967f3fb11 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -188,6 +188,11 @@ static inline struct configfs_subsystem *to_configfs_subsystem(struct config_gro int configfs_register_subsystem(struct configfs_subsystem *subsys); void configfs_unregister_subsystem(struct configfs_subsystem *subsys); +/* These functions can sleep and can alloc with GFP_KERNEL */ +/* WARNING: These cannot be called underneath configfs callbacks!! */ +int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target); +void configfs_undepend_item(struct configfs_subsystem *subsys, struct config_item *target); + #endif /* __KERNEL__ */ #endif /* _CONFIGFS_H_ */ -- cgit v1.2.3 From 45a66c1c3ff88e8050dd25e81bafdf79a12a8042 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 9 Jul 2007 11:46:13 -0700 Subject: libata-core: convert to use cancel_rearming_delayed_work() We should not use cancel_work_sync(delayed_work->work). This works, but not good. We can use cancel_rearming_delayed_work(), this also simplifies the code. Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- include/linux/libata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index a3df64677ac3..bf98d44c8109 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -196,7 +196,6 @@ enum { ATA_PFLAG_SCSI_HOTPLUG = (1 << 6), /* SCSI hotplug scheduled */ ATA_PFLAG_INITIALIZING = (1 << 7), /* being initialized, don't touch */ - ATA_PFLAG_FLUSH_PORT_TASK = (1 << 16), /* flush port task */ ATA_PFLAG_SUSPENDED = (1 << 17), /* port is suspended (power) */ ATA_PFLAG_PM_PENDING = (1 << 18), /* PM operation pending */ ATA_PFLAG_GTM_VALID = (1 << 19), /* acpi_gtm data valid */ -- cgit v1.2.3 From d583bc18812f8da52bf25eef9cd111e5fd46a6ab Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 4 Jul 2007 18:02:07 +0900 Subject: libata: simplify PCI legacy SFF host handling With PCI resource fix up for legacy hosts. We can use the same code path to allocate IO resources and initialize host for both legacy and native SFF hosts. Only IRQ requesting needs to be different. Rename ata_pci_*_native_host() to ata_pci_*_sff_host(), kill all legacy specific functions and use the renamed functions instead. This simplifies code a lot. Signed-off-by: Tejun Heo Cc: Alan Cox Signed-off-by: Jeff Garzik --- include/linux/libata.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index bf98d44c8109..0c8b6578bd59 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -870,11 +870,11 @@ struct pci_bits { unsigned long val; }; -extern int ata_pci_init_native_host(struct ata_host *host); +extern int ata_pci_init_sff_host(struct ata_host *host); extern int ata_pci_init_bmdma(struct ata_host *host); -extern int ata_pci_prepare_native_host(struct pci_dev *pdev, - const struct ata_port_info * const * ppi, - struct ata_host **r_host); +extern int ata_pci_prepare_sff_host(struct pci_dev *pdev, + const struct ata_port_info * const * ppi, + struct ata_host **r_host); extern int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits); extern unsigned long ata_pci_default_filter(struct ata_device *, unsigned long); #endif /* CONFIG_PCI */ -- cgit v1.2.3 From 75683fe7153c3817bb4fd4491e2a5913af6c463e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 5 Jul 2007 13:31:27 +0900 Subject: libata: clean up horkage handling Horkage handling had the following problems. * dev->horkage was positioned after ATA_DEVICE_CLEAR_OFFSET, so it was cleared before the device is configured. This broke HORKAGE_DIAGNOSTIC. * Some used dev->horkage while others called ata_device_blacklisted() directly. This was at best confusing. This patch moves dev->horkage right after dev->flags and set the field according to the blacklist during device configuration. All users test against dev->horkage. ata_device_blacklisted() now has only one user, make it static. While at it, rename it to ata_dev_blacklisted() for consistency. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 0c8b6578bd59..47cd2a1c5544 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -434,6 +434,7 @@ struct ata_device { struct ata_port *ap; unsigned int devno; /* 0 or 1 */ unsigned long flags; /* ATA_DFLAG_xxx */ + unsigned int horkage; /* List of broken features */ struct scsi_device *sdev; /* attached SCSI device */ #ifdef CONFIG_ATA_ACPI acpi_handle acpi_handle; @@ -465,7 +466,6 @@ struct ata_device { /* error history */ struct ata_ering ering; int spdn_cnt; - unsigned int horkage; /* List of broken features */ }; /* Offset into struct ata_device. Fields above it are maintained @@ -793,7 +793,6 @@ extern void ata_id_string(const u16 *id, unsigned char *s, extern void ata_id_c_string(const u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); extern void ata_id_to_dma_mode(struct ata_device *dev, u8 unknown); -extern unsigned long ata_device_blacklisted(const struct ata_device *dev); extern void ata_bmdma_setup (struct ata_queued_cmd *qc); extern void ata_bmdma_start (struct ata_queued_cmd *qc); extern void ata_bmdma_stop(struct ata_queued_cmd *qc); -- cgit v1.2.3 From 814600ee10d3c056ada315cdbdc2ebe48f54c75a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 1 Jul 2007 19:05:58 +0900 Subject: libata-link: add PMP related ATA constants Add Port Multiplier related ATA constants and macros. Some of these will be used by ata_link implementation. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/ata.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index 407dc7e098bc..b5a20162af32 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -164,6 +164,8 @@ enum { ATA_CMD_SET_MAX = 0xF9, ATA_CMD_SET_MAX_EXT = 0x37, ATA_CMD_READ_LOG_EXT = 0x2f, + ATA_CMD_PMP_READ = 0xE4, + ATA_CMD_PMP_WRITE = 0xE8, /* READ_LOG_EXT pages */ ATA_LOG_SATA_NCQ = 0x10, @@ -212,6 +214,28 @@ enum { 0=to device, 1=to host */ ATAPI_CDB_LEN = 16, + /* PMP stuff */ + SATA_PMP_MAX_PORTS = 15, + SATA_PMP_CTRL_PORT = 15, + + SATA_PMP_GSCR_DWORDS = 128, + SATA_PMP_GSCR_PROD_ID = 0, + SATA_PMP_GSCR_REV = 1, + SATA_PMP_GSCR_PORT_INFO = 2, + SATA_PMP_GSCR_ERROR = 32, + SATA_PMP_GSCR_ERROR_EN = 33, + SATA_PMP_GSCR_FEAT = 64, + SATA_PMP_GSCR_FEAT_EN = 96, + + SATA_PMP_PSCR_STATUS = 0, + SATA_PMP_PSCR_ERROR = 1, + SATA_PMP_PSCR_CONTROL = 2, + + SATA_PMP_FEAT_BIST = (1 << 0), + SATA_PMP_FEAT_PMREQ = (1 << 1), + SATA_PMP_FEAT_DYNSSC = (1 << 2), + SATA_PMP_FEAT_NOTIFY = (1 << 3), + /* cable types */ ATA_CBL_NONE = 0, ATA_CBL_PATA40 = 1, @@ -418,4 +442,9 @@ static inline int lba_48_ok(u64 block, u32 n_block) return ((block + n_block - 1) < ((u64)1 << 48)) && (n_block <= 65536); } +#define sata_pmp_gscr_vendor(gscr) ((gscr)[SATA_PMP_GSCR_PROD_ID] & 0xffff) +#define sata_pmp_gscr_devid(gscr) ((gscr)[SATA_PMP_GSCR_PROD_ID] >> 16) +#define sata_pmp_gscr_rev(gscr) (((gscr)[SATA_PMP_GSCR_REV] >> 8) & 0xff) +#define sata_pmp_gscr_ports(gscr) ((gscr)[SATA_PMP_GSCR_PORT_INFO] & 0xf) + #endif /* __LINUX_ATA_H__ */ -- cgit v1.2.3 From 88be9f990fe70f0f177ef44a16a477599e91f825 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2007 10:42:27 -0400 Subject: NFS: Replace vfsmount and dentry in nfs_open_context with struct path Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0543439a97af..07eea8f64ecf 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -70,8 +71,7 @@ struct nfs_access_entry { struct nfs4_state; struct nfs_open_context { atomic_t count; - struct vfsmount *vfsmnt; - struct dentry *dentry; + struct path path; struct rpc_cred *cred; struct nfs4_state *state; fl_owner_t lockowner; -- cgit v1.2.3 From aa53ed541a1fec78a78d02afc8b042d040cc080d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 5 Jun 2007 14:49:03 -0400 Subject: NFS4: on a O_EXCL OPEN make sure SETATTR sets the fields holding the verifier The Linux NFS4 client simply skips over the bitmask in an O_EXCL open call and so it doesn't bother to reset any fields that may be holding the verifier. This patch has us save the first two words of the bitmask (which is all the current client has #defines for). The client then later checks this bitmask and turns on the appropriate flags in the sattr->ia_verify field for the following SETATTR call. This patch only currently checks to see if the server used the atime and mtime slots for the verifier (which is what the Linux server uses for this). I'm not sure of what other fields the server could reasonably use, but adding checks for others should be trivial. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 7e7f33a38fc0..8726491de154 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -15,6 +15,7 @@ #include +#define NFS4_BITMAP_SIZE 2 #define NFS4_VERIFIER_SIZE 8 #define NFS4_STATEID_SIZE 16 #define NFS4_FHSIZE 128 diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 10c26ed0db71..f7100df3a690 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -144,6 +144,7 @@ struct nfs_openres { nfs4_stateid delegation; __u32 do_recall; __u64 maxsize; + __u32 attrset[NFS4_BITMAP_SIZE]; }; /* -- cgit v1.2.3 From c03b40246123b2ced79e2620d1d2c089bb12369a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jun 2007 13:26:38 -0400 Subject: NFS: Convert struct nfs_page to use krefs Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index bd193af80162..c780e7e39f99 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -16,7 +16,7 @@ #include #include -#include +#include /* * Valid flags for the radix tree @@ -42,7 +42,7 @@ struct nfs_page { unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ wb_pgbase, /* Start of page data */ wb_bytes; /* Length of request */ - atomic_t wb_count; /* reference count */ + struct kref wb_kref; /* reference count */ unsigned long wb_flags; struct nfs_writeverf wb_verf; /* Commit cookie */ }; @@ -89,7 +89,7 @@ extern void nfs_clear_page_writeback(struct nfs_page *req); /* - * Lock the page of an asynchronous request without incrementing the wb_count + * Lock the page of an asynchronous request without getting a new reference */ static inline int nfs_lock_request_dontget(struct nfs_page *req) @@ -98,14 +98,14 @@ nfs_lock_request_dontget(struct nfs_page *req) } /* - * Lock the page of an asynchronous request + * Lock the page of an asynchronous request and take a reference */ static inline int nfs_lock_request(struct nfs_page *req) { if (test_and_set_bit(PG_BUSY, &req->wb_flags)) return 0; - atomic_inc(&req->wb_count); + kref_get(&req->wb_kref); return 1; } -- cgit v1.2.3 From 9fd367f0f376ccfb2592eed9be0eece70429894f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jun 2007 15:10:24 -0400 Subject: NFS cleanup: Rename NFS_PAGE_TAG_WRITEBACK to NFS_PAGE_TAG_LOCKED Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index c780e7e39f99..042434c39b7e 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -21,7 +21,7 @@ /* * Valid flags for the radix tree */ -#define NFS_PAGE_TAG_WRITEBACK 0 +#define NFS_PAGE_TAG_LOCKED 0 /* * Valid flags for a dirty buffer @@ -84,8 +84,7 @@ extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); -extern int nfs_set_page_writeback_locked(struct nfs_page *req); -extern void nfs_clear_page_writeback(struct nfs_page *req); +extern void nfs_clear_page_tag_locked(struct nfs_page *req); /* -- cgit v1.2.3 From 5c36968343fcd013a3f7ae93f246c2e75596780b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jun 2007 15:27:42 -0400 Subject: NFS cleanup: speed up nfs_scan_commit using radix tree tags Add a tag for requests that are waiting for a COMMIT Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 042434c39b7e..481a42105d69 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -22,6 +22,7 @@ * Valid flags for the radix tree */ #define NFS_PAGE_TAG_LOCKED 0 +#define NFS_PAGE_TAG_COMMIT 1 /* * Valid flags for a dirty buffer @@ -71,8 +72,8 @@ extern void nfs_clear_request(struct nfs_page *req); extern void nfs_release_request(struct nfs_page *req); -extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst, - pgoff_t idx_start, unsigned int npages); +extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, + pgoff_t idx_start, unsigned int npages, int tag); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), -- cgit v1.2.3 From 2aefa104313996d1a9582476cee53d1296c834bf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jun 2007 15:40:59 -0400 Subject: NFS: Remove the redundant 'dirty' and 'commit' lists from nfs_inode Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 5 +---- include/linux/nfs_page.h | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 07eea8f64ecf..a94205476736 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -156,12 +156,9 @@ struct nfs_inode { * This is the list of dirty unwritten pages. */ spinlock_t req_lock; - struct list_head dirty; - struct list_head commit; struct radix_tree_root nfs_page_tree; - unsigned int ndirty, - ncommit, + unsigned int ncommit, npages; /* Open contexts for shared mmap writes */ diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 481a42105d69..78e60798d10e 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -34,8 +34,7 @@ struct nfs_inode; struct nfs_page { - struct list_head wb_list, /* Defines state of page: */ - *wb_list_head; /* read/write/commit */ + struct list_head wb_list; /* Defines state of page: */ struct page *wb_page; /* page to read in/write out */ struct nfs_open_context *wb_context; /* File state context info */ atomic_t wb_complete; /* i/os we're waiting for */ @@ -118,7 +117,6 @@ static inline void nfs_list_add_request(struct nfs_page *req, struct list_head *head) { list_add_tail(&req->wb_list, head); - req->wb_list_head = head; } @@ -132,7 +130,6 @@ nfs_list_remove_request(struct nfs_page *req) if (list_empty(&req->wb_list)) return; list_del_init(&req->wb_list); - req->wb_list_head = NULL; } static inline struct nfs_page * -- cgit v1.2.3 From dce34ce298d85b81630401f4feb4bd7ac77fe9c7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jun 2007 15:47:53 -0400 Subject: NFS: Prevent integer overflow in nfs_scan_list() Also ensure that nfs_inode ncommit and npages are large enough to represent all possible values for the number of pages. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a94205476736..750708ccd708 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -158,7 +158,7 @@ struct nfs_inode { spinlock_t req_lock; struct radix_tree_root nfs_page_tree; - unsigned int ncommit, + unsigned long ncommit, npages; /* Open contexts for shared mmap writes */ -- cgit v1.2.3 From 3bec63db55463365110d00721ed60a31e4614cb6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 17 Jun 2007 16:02:44 -0400 Subject: NFS: Convert struct nfs_open_context to use a kref Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 750708ccd708..bf24151d63be 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -30,6 +30,7 @@ #ifdef __KERNEL__ #include +#include #include #include #include @@ -70,7 +71,7 @@ struct nfs_access_entry { struct nfs4_state; struct nfs_open_context { - atomic_t count; + struct kref kref; struct path path; struct rpc_cred *cred; struct nfs4_state *state; -- cgit v1.2.3 From 6529eba08fe7297852391a468d95322913de73fa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2007 16:40:14 -0400 Subject: SUNRPC: Move rpc_task->tk_task list into struct rpc_clnt Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 4 ++++ include/linux/sunrpc/sched.h | 5 ----- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 66611423c8ee..0801ab5407ce 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -26,6 +26,8 @@ struct rpc_inode; struct rpc_clnt { atomic_t cl_count; /* Number of clones */ atomic_t cl_users; /* number of references */ + struct list_head cl_clients; /* Global list of clients */ + struct list_head cl_tasks; /* List of tasks */ struct rpc_xprt * cl_xprt; /* transport */ struct rpc_procinfo * cl_procinfo; /* procedure info */ u32 cl_prog, /* RPC program number */ @@ -122,6 +124,8 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); int rpc_shutdown_client(struct rpc_clnt *); int rpc_destroy_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); +void rpc_register_client(struct rpc_clnt *); +void rpc_unregister_client(struct rpc_clnt *); int rpcb_register(u32, u32, int, unsigned short, int *); void rpcb_getport(struct rpc_task *); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 2047fb202a13..3387b008cdfc 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -110,11 +110,6 @@ struct rpc_task { if (!list_empty(head) && \ ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1)) -/* .. and walking list of all tasks */ -#define alltask_for_each(task, pos, head) \ - list_for_each(pos, head) \ - if ((task=list_entry(pos, struct rpc_task, tk_task)),1) - typedef void (*rpc_action)(struct rpc_task *); struct rpc_call_ops { -- cgit v1.2.3 From 4bef61ff7514396419563ca54fd42ef846485b06 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 16 Jun 2007 14:17:01 -0400 Subject: SUNRPC: Add a per-rpc_clnt spinlock Use that to protect the rpc_clnt->cl_tasks list instead of using a global lock. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 0801ab5407ce..2f4b520a7419 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -28,6 +28,7 @@ struct rpc_clnt { atomic_t cl_users; /* number of references */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ + spinlock_t cl_lock; /* spinlock */ struct rpc_xprt * cl_xprt; /* transport */ struct rpc_procinfo * cl_procinfo; /* procedure info */ u32 cl_prog, /* RPC program number */ -- cgit v1.2.3 From 34f52e3591f241b825353ba27def956d8487c400 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2007 16:40:31 -0400 Subject: SUNRPC: Convert rpc_clnt->cl_users to a kref Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 2f4b520a7419..003d8ea70c19 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -24,8 +24,8 @@ struct rpc_inode; * The high-level client handle */ struct rpc_clnt { + struct kref cl_kref; /* Number of references */ atomic_t cl_count; /* Number of clones */ - atomic_t cl_users; /* number of references */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ -- cgit v1.2.3 From 848f1fe6be2e290691bb6c13cbb8fd92bd0cfaab Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Jun 2007 19:39:12 -0400 Subject: SUNRPC: Kill rpc_clnt->cl_dead Its use is at best racy, and there is only one user (lockd), which has additional locking that makes the whole thing redundant. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 003d8ea70c19..ab3ef6d629a7 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -45,8 +45,7 @@ struct rpc_clnt { cl_intr : 1,/* interruptible */ cl_discrtry : 1,/* disconnect before retry */ cl_autobind : 1,/* use getport() */ - cl_oneshot : 1,/* dispose after use */ - cl_dead : 1;/* abandoned */ + cl_oneshot : 1;/* dispose after use */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ -- cgit v1.2.3 From 90c5755ff5111ffdcca10a1e8a823dba29f37b6d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Jun 2007 19:49:36 -0400 Subject: SUNRPC: Kill rpc_clnt->cl_oneshot Replace it with explicit calls to rpc_shutdown_client() or rpc_destroy_client() (for the case of asynchronous calls). Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ab3ef6d629a7..fe7ea65ed0ae 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -44,8 +44,7 @@ struct rpc_clnt { unsigned int cl_softrtry : 1,/* soft timeouts */ cl_intr : 1,/* interruptible */ cl_discrtry : 1,/* disconnect before retry */ - cl_autobind : 1,/* use getport() */ - cl_oneshot : 1;/* dispose after use */ + cl_autobind : 1;/* use getport() */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ @@ -112,10 +111,9 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_HARDRTRY (1UL << 0) #define RPC_CLNT_CREATE_INTR (1UL << 1) #define RPC_CLNT_CREATE_AUTOBIND (1UL << 2) -#define RPC_CLNT_CREATE_ONESHOT (1UL << 3) -#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 4) -#define RPC_CLNT_CREATE_NOPING (1UL << 5) -#define RPC_CLNT_CREATE_DISCRTRY (1UL << 6) +#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 3) +#define RPC_CLNT_CREATE_NOPING (1UL << 4) +#define RPC_CLNT_CREATE_DISCRTRY (1UL << 5) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, -- cgit v1.2.3 From 4c402b40970382ded616eadd544fd63feb76cc79 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2007 16:40:32 -0400 Subject: SUNRPC: Remove rpc_clnt->cl_count The kref now does most of what cl_count + cl_user used to do. The only remaining role for cl_count is to tell us if we are in a 'shutdown' phase. We can provide that information using a single bit field instead of a full atomic counter. Also rename rpc_destroy_client() to rpc_close_client(), which reflects better what its role is these days. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index fe7ea65ed0ae..cf03494c36e7 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -25,7 +25,6 @@ struct rpc_inode; */ struct rpc_clnt { struct kref cl_kref; /* Number of references */ - atomic_t cl_count; /* Number of clones */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ @@ -119,8 +118,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, struct rpc_program *, int); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); -int rpc_shutdown_client(struct rpc_clnt *); -int rpc_destroy_client(struct rpc_clnt *); +void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); void rpc_register_client(struct rpc_clnt *); void rpc_unregister_client(struct rpc_clnt *); -- cgit v1.2.3 From f61534dfd38f895b203e2aadaba04f21a992ca8c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2007 17:31:58 -0400 Subject: SUNRPC: Remove redundant calls to rpciod_up()/rpciod_down() Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 52b4378311c8..144d955dc46a 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -16,7 +16,6 @@ struct nfs_client { #define NFS_CS_INITING 1 /* busy initialising */ int cl_nfsversion; /* NFS protocol version */ unsigned long cl_res_state; /* NFS resources state */ -#define NFS_CS_RPCIOD 0 /* - rpciod started */ #define NFS_CS_CALLBACK 1 /* - callback started */ #define NFS_CS_IDMAP 2 /* - idmap started */ #define NFS_CS_RENEWD 3 /* - renewd started */ -- cgit v1.2.3 From 188fef11db219f13f32d055ba59985e7d1a349fe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 16 Jun 2007 14:18:40 -0400 Subject: SUNRPC: Move rpc_register_client and friends into net/sunrpc/clnt.c Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index cf03494c36e7..a451351c7eff 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -120,8 +120,6 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); -void rpc_register_client(struct rpc_clnt *); -void rpc_unregister_client(struct rpc_clnt *); int rpcb_register(u32, u32, int, unsigned short, int *); void rpcb_getport(struct rpc_task *); -- cgit v1.2.3 From 4a8c1344dccb848dbcf0edabc8b5c51a8ecf2808 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Jun 2007 10:14:14 -0400 Subject: SUNRPC: Add a backpointer from the struct rpc_cred to the rpc_auth Cleans up an issue whereby rpcsec_gss uses the rpc_clnt->cl_auth. If we want to be able to add several rpc_auths to a single rpc_clnt, then this abuse must go. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 534cdc7be58d..8ef27afeea73 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -30,8 +30,11 @@ struct auth_cred { /* * Client user credentials */ +struct rpc_auth; +struct rpc_credops; struct rpc_cred { struct hlist_node cr_hash; /* hash chain */ + struct rpc_auth * cr_auth; struct rpc_credops * cr_ops; unsigned long cr_expire; /* when to gc */ atomic_t cr_count; /* ref count */ @@ -60,6 +63,7 @@ struct rpc_cred_cache { unsigned long expire; /* cache expiry interval */ }; +struct rpc_authops; struct rpc_auth { unsigned int au_cslack; /* call cred size estimate */ /* guess at number of u32's auth adds before -- cgit v1.2.3 From 6e84c7b66a0aa0be16a7728d1e687c57978dac2c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 7 Jun 2007 15:31:36 -0400 Subject: SUNRPC: Add a downcall queue to struct rpc_inode Currently, the downcall queue is tied to the struct gss_auth, which means that different RPCSEC_GSS pseudoflavours must use different upcall pipes. Add a list to struct rpc_inode that can be used instead. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index ad293760f6eb..430cea104817 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -23,6 +23,7 @@ struct rpc_inode { void *private; struct list_head pipe; struct list_head in_upcall; + struct list_head in_downcall; int pipelen; int nreaders; int nwriters; -- cgit v1.2.3 From 03a1256f06cf1f58e33971fb4a524479e75c200e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 8 Jun 2007 14:14:53 -0400 Subject: SUNRPC: Add a field to track the number of kernel users of an rpc_pipe This allows us to correctly deduce when we need to remove the pipe. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/rpc_pipe_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 430cea104817..51b977a4ca20 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -27,6 +27,7 @@ struct rpc_inode { int pipelen; int nreaders; int nwriters; + int nkern_readwriters; wait_queue_head_t waitq; #define RPC_PIPE_WAIT_FOR_OPEN 1 int flags; -- cgit v1.2.3 From 3ab9bb7243489f9db3abf3d05521ddfc6b184c0a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 9 Jun 2007 15:41:42 -0400 Subject: SUNRPC: Fix a memory leak in the auth credcache code The leak only affects the RPCSEC_GSS caches, since they are the only ones that are dynamically allocated... Rename the existing rpcauth_free_credcache() to rpcauth_clear_credcache() in order to better describe its role, then add a new function rpcauth_destroy_credcache() that actually frees the cache in addition to clearing it out. Also move the call to destroy the credcache in gss_destroy() to come before the rpc upcall pipe is unlinked. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 8ef27afeea73..3972b8414c88 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -143,7 +143,8 @@ int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *); int rpcauth_init_credcache(struct rpc_auth *, unsigned long); -void rpcauth_free_credcache(struct rpc_auth *); +void rpcauth_destroy_credcache(struct rpc_auth *); +void rpcauth_clear_credcache(struct rpc_cred_cache *); static inline struct rpc_cred * get_rpccred(struct rpc_cred *cred) -- cgit v1.2.3 From 64c91a1f1c8bc4295fd6b90df8adf911a7dd64f4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 23 Jun 2007 10:17:16 -0400 Subject: SUNRPC: Make rpc_ping() static Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index a451351c7eff..a0e51e193284 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -136,7 +136,6 @@ void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); size_t rpc_max_payload(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); -int rpc_ping(struct rpc_clnt *clnt, int flags); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); -- cgit v1.2.3 From 5e1550d6a2c2dd33ff0ca5febefd8e9c65c6ca1e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 23 Jun 2007 10:17:16 -0400 Subject: SUNRPC: Add the helper function 'rpc_call_null()' Does a NULL RPC call and returns a pointer to the resulting rpc_task. The call may be either synchronous or asynchronous. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index a0e51e193284..097984b03857 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -130,6 +130,8 @@ int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, void *calldata); int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags); +struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, + int flags); void rpc_restart_call(struct rpc_task *); void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset); -- cgit v1.2.3 From de7a8ce38aea529876db3890b61947bc4bc004da Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 23 Jun 2007 10:46:47 -0400 Subject: SUNRPC: Rename rpcauth_destroy() to rpcauth_release() Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 3972b8414c88..bc77c730325c 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -128,7 +128,7 @@ extern struct rpc_authops authdes_ops; int rpcauth_register(struct rpc_authops *); int rpcauth_unregister(struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); -void rpcauth_destroy(struct rpc_auth *); +void rpcauth_release(struct rpc_auth *); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); struct rpc_cred * rpcauth_bindcred(struct rpc_task *); -- cgit v1.2.3 From f1c0a8615090359d57e096157feb9f900cbb233c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 23 Jun 2007 20:17:58 -0400 Subject: SUNRPC: Mark auth and cred operation tables as constant. Also do the same for gss_api operation tables. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 15 ++++++--------- include/linux/sunrpc/gss_api.h | 2 +- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index bc77c730325c..e606c2804685 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -35,7 +35,7 @@ struct rpc_credops; struct rpc_cred { struct hlist_node cr_hash; /* hash chain */ struct rpc_auth * cr_auth; - struct rpc_credops * cr_ops; + const struct rpc_credops *cr_ops; unsigned long cr_expire; /* when to gc */ atomic_t cr_count; /* ref count */ unsigned short cr_flags; /* various flags */ @@ -73,7 +73,7 @@ struct rpc_auth { unsigned int au_verfsize; unsigned int au_flags; /* various flags */ - struct rpc_authops * au_ops; /* operations */ + const struct rpc_authops *au_ops; /* operations */ rpc_authflavor_t au_flavor; /* pseudoflavor (note may * differ from the flavor in * au_ops->au_flavor in gss @@ -119,14 +119,11 @@ struct rpc_credops { void *, __be32 *, void *); }; -extern struct rpc_authops authunix_ops; -extern struct rpc_authops authnull_ops; -#ifdef CONFIG_SUNRPC_SECURE -extern struct rpc_authops authdes_ops; -#endif +extern const struct rpc_authops authunix_ops; +extern const struct rpc_authops authnull_ops; -int rpcauth_register(struct rpc_authops *); -int rpcauth_unregister(struct rpc_authops *); +int rpcauth_register(const struct rpc_authops *); +int rpcauth_unregister(const struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); void rpcauth_release(struct rpc_auth *); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 5eca9e442051..bbac101ac372 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -77,7 +77,7 @@ struct gss_api_mech { struct module *gm_owner; struct xdr_netobj gm_oid; char *gm_name; - struct gss_api_ops *gm_ops; + const struct gss_api_ops *gm_ops; /* pseudoflavors supported by this mechanism: */ int gm_pf_num; struct pf_desc * gm_pfs; -- cgit v1.2.3 From 5fe4755e2526a2aa82b7ed8daeb3aed74a236925 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 23 Jun 2007 19:55:31 -0400 Subject: SUNRPC: Clean up rpc credential initialisation Add a helper rpc_cred_init() Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 1 + include/linux/sunrpc/auth_gss.h | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index e606c2804685..d5bfc67461fc 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -127,6 +127,7 @@ int rpcauth_unregister(const struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); void rpcauth_release(struct rpc_auth *); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); +void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); struct rpc_cred * rpcauth_bindcred(struct rpc_task *); void rpcauth_holdcred(struct rpc_task *); diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 2db2fbf34947..0bd1d06777b9 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -85,11 +85,6 @@ struct gss_cred { struct gss_upcall_msg *gc_upcall; }; -#define gc_uid gc_base.cr_uid -#define gc_count gc_base.cr_count -#define gc_flags gc_base.cr_flags -#define gc_expire gc_base.cr_expire - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_AUTH_GSS_H */ -- cgit v1.2.3 From fc432dd90760a629c57026e57f65ff80a1a31d2f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 25 Jun 2007 10:15:15 -0400 Subject: SUNRPC: Enforce atomic updates of rpc_cred->cr_flags Convert to the use of atomic bitops... Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index d5bfc67461fc..8586503d5ebd 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -36,19 +36,19 @@ struct rpc_cred { struct hlist_node cr_hash; /* hash chain */ struct rpc_auth * cr_auth; const struct rpc_credops *cr_ops; - unsigned long cr_expire; /* when to gc */ - atomic_t cr_count; /* ref count */ - unsigned short cr_flags; /* various flags */ #ifdef RPC_DEBUG unsigned long cr_magic; /* 0x0f4aa4f0 */ #endif + unsigned long cr_expire; /* when to gc */ + unsigned long cr_flags; /* various flags */ + atomic_t cr_count; /* ref count */ uid_t cr_uid; /* per-flavor data */ }; -#define RPCAUTH_CRED_NEW 0x0001 -#define RPCAUTH_CRED_UPTODATE 0x0002 +#define RPCAUTH_CRED_NEW 0 +#define RPCAUTH_CRED_UPTODATE 1 #define RPCAUTH_CRED_MAGIC 0x0f4aa4f0 -- cgit v1.2.3 From e092bdcd939416ef911090890096fe07d0281a5e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 23 Jun 2007 19:45:36 -0400 Subject: SUNRPC: cleanup rpc credential cache garbage collection Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 8586503d5ebd..4e78f0c5f014 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -34,6 +34,7 @@ struct rpc_auth; struct rpc_credops; struct rpc_cred { struct hlist_node cr_hash; /* hash chain */ + struct list_head cr_lru; /* lru garbage collection */ struct rpc_auth * cr_auth; const struct rpc_credops *cr_ops; #ifdef RPC_DEBUG -- cgit v1.2.3 From 31be5bf15f3dafffce110eb1afadccbf2e3067b4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 24 Jun 2007 15:55:26 -0400 Subject: SUNRPC: Convert the credcache lookup code to use RCU Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 4e78f0c5f014..5974e8a493c4 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -16,6 +16,7 @@ #include #include +#include /* size of the nodename buffer */ #define UNX_MAXNODENAME 32 @@ -35,6 +36,7 @@ struct rpc_credops; struct rpc_cred { struct hlist_node cr_hash; /* hash chain */ struct list_head cr_lru; /* lru garbage collection */ + struct rcu_head cr_rcu; struct rpc_auth * cr_auth; const struct rpc_credops *cr_ops; #ifdef RPC_DEBUG @@ -50,6 +52,7 @@ struct rpc_cred { }; #define RPCAUTH_CRED_NEW 0 #define RPCAUTH_CRED_UPTODATE 1 +#define RPCAUTH_CRED_HASHED 2 #define RPCAUTH_CRED_MAGIC 0x0f4aa4f0 -- cgit v1.2.3 From 9499b4341b56935f61af9e7e354e7d11e70f5258 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 24 Jun 2007 15:57:57 -0400 Subject: SUNRPC: Give credential cache a local spinlock Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 5974e8a493c4..e5a3b5141ed2 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -63,6 +63,7 @@ struct rpc_cred { #define RPC_CREDCACHE_MASK (RPC_CREDCACHE_NR - 1) struct rpc_cred_cache { struct hlist_head hashtable[RPC_CREDCACHE_NR]; + spinlock_t lock; unsigned long nextgc; /* next garbage collection */ unsigned long expire; /* cache expiry interval */ }; @@ -126,6 +127,8 @@ struct rpc_credops { extern const struct rpc_authops authunix_ops; extern const struct rpc_authops authnull_ops; +void __init rpc_init_authunix(void); + int rpcauth_register(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); -- cgit v1.2.3 From f5c2187cfef628784d8a09b6d0f77888246d0c0f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 25 Jun 2007 17:11:20 -0400 Subject: SUNRPC: Convert the credential garbage collector into a shrinker callback Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index e5a3b5141ed2..7a69ca3bebaf 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -64,8 +64,6 @@ struct rpc_cred { struct rpc_cred_cache { struct hlist_head hashtable[RPC_CREDCACHE_NR]; spinlock_t lock; - unsigned long nextgc; /* next garbage collection */ - unsigned long expire; /* cache expiry interval */ }; struct rpc_authops; @@ -128,6 +126,8 @@ extern const struct rpc_authops authunix_ops; extern const struct rpc_authops authnull_ops; void __init rpc_init_authunix(void); +void __init rpcauth_init_module(void); +void __exit rpcauth_remove_module(void); int rpcauth_register(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *); @@ -147,7 +147,7 @@ int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *); -int rpcauth_init_credcache(struct rpc_auth *, unsigned long); +int rpcauth_init_credcache(struct rpc_auth *); void rpcauth_destroy_credcache(struct rpc_auth *); void rpcauth_clear_credcache(struct rpc_cred_cache *); -- cgit v1.2.3 From 5d28dc82074f1e64b22c9424b161abc1f5d6bcdb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 Jun 2007 19:18:38 -0400 Subject: SUNRPC: Convert gss_ctx_lock to an RCU lock Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth_gss.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 0bd1d06777b9..67658e17a375 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -75,6 +75,7 @@ struct gss_cl_ctx { struct xdr_netobj gc_wire_ctx; u32 gc_win; unsigned long gc_expiry; + struct rcu_head gc_rcu; }; struct gss_upcall_msg; -- cgit v1.2.3 From 1be27f36601973815171db684c711d30557cf50c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 27 Jun 2007 14:29:04 -0400 Subject: SUNRPC: Remove the tk_auth macro... We should almost always be deferencing the rpc_auth struct by means of the credential's cr_auth field instead of the rpc_clnt->cl_auth anyway. Fix up that historical mistake, and remove the macro that propagated it. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 3387b008cdfc..8ea077db0099 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -98,7 +98,6 @@ struct rpc_task { unsigned short tk_pid; /* debugging aid */ #endif }; -#define tk_auth tk_client->cl_auth #define tk_xprt tk_client->cl_xprt /* support walking a list of tasks on a wait queue */ -- cgit v1.2.3 From 587142f85f796cf0b823dd3080e815f02ff6b952 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Jul 2007 09:57:54 -0400 Subject: NFS: Replace NFS_I(inode)->req_lock with inode->i_lock There is no justification for keeping a special spinlock for the exclusive use of the NFS writeback code. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index bf24151d63be..cf395351cdd4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -156,7 +156,6 @@ struct nfs_inode { /* * This is the list of dirty unwritten pages. */ - spinlock_t req_lock; struct radix_tree_root nfs_page_tree; unsigned long ncommit, -- cgit v1.2.3 From 7af654f8d1b7460415af5d1d326233478dd0f563 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Jul 2007 12:49:23 -0400 Subject: NFSv4: Don't reuse expired nfs4_state_owner structs That just confuses certain NFSv4 servers. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 144d955dc46a..2cef0a68aa77 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -44,8 +44,6 @@ struct nfs_client { struct list_head cl_delegations; struct list_head cl_state_owners; - struct list_head cl_unused; - int cl_nunused; spinlock_t cl_lock; unsigned long cl_lease_time; -- cgit v1.2.3 From 9f958ab8858c75df800e0121b1920182820cbc39 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Jul 2007 13:58:33 -0400 Subject: NFSv4: Reduce the chances of an open_owner identifier collision Currently we just use a 32-bit counter. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 5 +++-- include/linux/nfs_xdr.h | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 2cef0a68aa77..0cac49bc0955 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -34,7 +34,8 @@ struct nfs_client { nfs4_verifier cl_confirm; unsigned long cl_state; - u32 cl_lockowner_id; + struct rb_root cl_openowner_id; + struct rb_root cl_lockowner_id; /* * The following rwsem ensures exclusive access to the server @@ -43,7 +44,7 @@ struct nfs_client { struct rw_semaphore cl_sem; struct list_head cl_delegations; - struct list_head cl_state_owners; + struct rb_root cl_state_owners; spinlock_t cl_lock; unsigned long cl_lease_time; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f7100df3a690..38d77681cf27 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -119,7 +119,7 @@ struct nfs_openargs { struct nfs_seqid * seqid; int open_flags; __u64 clientid; - __u32 id; + __u64 id; union { struct iattr * attrs; /* UNCHECKED, GUARDED */ nfs4_verifier verifier; /* EXCLUSIVE */ @@ -181,7 +181,7 @@ struct nfs_closeres { * */ struct nfs_lowner { __u64 clientid; - u32 id; + __u64 id; }; struct nfs_lock_args { -- cgit v1.2.3 From 412c77cee6d6e73fbe1dc3d67f52163efed33fc4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jul 2007 16:10:55 -0400 Subject: NFSv4: Defer inode revalidation when setting up a delegation Currently we force a synchronous call to __nfs_revalidate_inode() in nfs_inode_set_delegation(). This not only ensures that we cannot call nfs_inode_set_delegation from an asynchronous context, but it also slows down any call to open(). Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index cf395351cdd4..e94971040de9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -184,6 +184,7 @@ struct nfs_inode { #define NFS_INO_INVALID_ACCESS 0x0008 /* cached access cred invalid */ #define NFS_INO_INVALID_ACL 0x0010 /* cached acls are invalid */ #define NFS_INO_REVAL_PAGECACHE 0x0020 /* must revalidate pagecache */ +#define NFS_INO_REVAL_FORCED 0x0040 /* force revalidation ignoring a delegation */ /* * Bit offsets in flags field -- cgit v1.2.3 From 433c92379d9c2c59c2ebc7628fe4fb02cfc2daf8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:12:14 -0400 Subject: NFS: Clean up nfs_size_to_loff_t() Use the same file size limit that lockd uses. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index e94971040de9..7deb5b0347f7 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -503,12 +503,10 @@ extern int nfsroot_mount(struct sockaddr_in *, char *, struct nfs_fh *, * inline functions */ -static inline loff_t -nfs_size_to_loff_t(__u64 size) +static inline loff_t nfs_size_to_loff_t(__u64 size) { - loff_t maxsz = (((loff_t) ULONG_MAX) << PAGE_CACHE_SHIFT) + PAGE_CACHE_SIZE - 1; - if (size > maxsz) - return maxsz; + if (size > (__u64) OFFSET_MAX - 1) + return OFFSET_MAX - 1; return (loff_t) size; } -- cgit v1.2.3 From 5680d48be88d12cd987e5579a6072a4ca34ca6ea Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:12:24 -0400 Subject: NFS: Clean-up: Define macros for maximum host and export path name lengths Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_mount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index cc8b9c59acb8..0b82a17c705b 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -37,7 +37,7 @@ struct nfs_mount_data { int acdirmin; /* 1 */ int acdirmax; /* 1 */ struct sockaddr_in addr; /* 1 */ - char hostname[256]; /* 1 */ + char hostname[NFS_MAXNAMLEN + 1]; /* 1 */ int namlen; /* 2 */ unsigned int bsize; /* 3 */ struct nfs3_fh root; /* 4 */ -- cgit v1.2.3 From f18289931d705f9c4634b361341a1677bea97aca Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:12:51 -0400 Subject: NFS: Add a new NFS debugging flag just for mount processing Note to self: fix up /usr/sbin/rpcdebug too Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 7deb5b0347f7..04f659f1e560 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -553,6 +553,7 @@ extern void * nfs_root_data(void); #define NFSDBG_ROOT 0x0080 #define NFSDBG_CALLBACK 0x0100 #define NFSDBG_CLIENT 0x0200 +#define NFSDBG_MOUNT 0x0400 #define NFSDBG_ALL 0xFFFF #ifdef __KERNEL__ -- cgit v1.2.3 From cce63cd6374e6f1b4ea897ece1454feb13993d7c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:12 -0400 Subject: SUNRPC: Rename rpcb_getport_external routine In preparation for handling NFS mount option parsing in the kernel, rename rpcb_getport_external as rpcb_get_port_sync, and make it available always (instead of only when CONFIG_ROOT_NFS is enabled). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 097984b03857..b28d919c7758 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -120,8 +120,10 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); + int rpcb_register(u32, u32, int, unsigned short, int *); void rpcb_getport(struct rpc_task *); +int rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int); void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); @@ -141,10 +143,5 @@ void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); -/* - * Helper function for NFSroot support - */ -int rpcb_getport_external(struct sockaddr_in *, __u32, __u32, int); - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3 From 45160d6275814e0c86206e6981f0b92c61a50a21 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:17 -0400 Subject: SUNRPC: Rename rpcb_getport to be consistent with new rpcb_getport_sync name Clean up, for consistency. Rename rpcb_getport as rpcb_getport_async, to match the naming scheme of rpcb_getport_sync. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b28d919c7758..c1b37972b0d5 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -122,8 +122,8 @@ void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); int rpcb_register(u32, u32, int, unsigned short, int *); -void rpcb_getport(struct rpc_task *); int rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int); +void rpcb_getport_async(struct rpc_task *); void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); -- cgit v1.2.3 From 3ea97309e6b18bce200211b3f9188e8023321adc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:27 -0400 Subject: NFS: Remake nfsroot_mount as a permanent part of NFS client In preparation for supporting NFSv2 and NFSv3 mount option handling in the kernel NFS client, convert mount_clnt.c to be a permanent part of the NFS client, instead of built only when CONFIG_ROOT_NFS is enabled. In addition, we also replace the "struct sockaddr_in *" argument with something more generic, to help support IPv6 at some later point. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 04f659f1e560..c098ae194f79 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -494,10 +494,9 @@ static inline void nfs3_forget_cached_acls(struct inode *inode) /* * linux/fs/mount_clnt.c - * (Used only by nfsroot module) */ -extern int nfsroot_mount(struct sockaddr_in *, char *, struct nfs_fh *, - int, int); +extern int nfs_mount(struct sockaddr *, size_t, char *, char *, + int, int, struct nfs_fh *); /* * inline functions -- cgit v1.2.3 From 8007122520f0a3599bdc4df47358a5d83b2574aa Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 1 Jul 2007 12:13:59 -0400 Subject: NFS: Add support for mounting NFSv4 file systems with string options Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs4_mount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h index 26b4c83f831d..d8d7480e5a47 100644 --- a/include/linux/nfs4_mount.h +++ b/include/linux/nfs4_mount.h @@ -65,6 +65,6 @@ struct nfs4_mount_data { #define NFS4_MOUNT_NOCTO 0x0010 /* 1 */ #define NFS4_MOUNT_NOAC 0x0020 /* 1 */ #define NFS4_MOUNT_STRICTLOCK 0x1000 /* 1 */ -#define NFS4_MOUNT_FLAGMASK 0xFFFF +#define NFS4_MOUNT_FLAGMASK 0x1033 #endif -- cgit v1.2.3 From 75180df2ed467866ada839fe73cf7cc7d75c0a22 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 May 2007 16:53:28 -0400 Subject: NFS: Add the mount option "nosharecache" Prior to David Howell's mount changes in 2.6.18, users who mounted different directories which happened to be from the same filesystem on the server would get different super blocks, and hence could choose different mount options. As long as there were no hard linked files that crossed from one subtree to another, this was quite safe. Post the changes, if the two directories are on the same filesystem (have the same 'fsid'), they will share the same super block, and hence the same mount options. Add a flag to allow users to elect not to share the NFS super block with another mount point, even if the fsids are the same. This will allow users to set different mount options for the two different super blocks, as was previously possible. It is still up to the user to ensure that there are no cache coherency issues when doing this, however the default behaviour will be to share super blocks whenever two paths result in the same fsid. Signed-off-by: Trond Myklebust --- include/linux/nfs4_mount.h | 3 ++- include/linux/nfs_mount.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h index d8d7480e5a47..a0dcf6655657 100644 --- a/include/linux/nfs4_mount.h +++ b/include/linux/nfs4_mount.h @@ -65,6 +65,7 @@ struct nfs4_mount_data { #define NFS4_MOUNT_NOCTO 0x0010 /* 1 */ #define NFS4_MOUNT_NOAC 0x0020 /* 1 */ #define NFS4_MOUNT_STRICTLOCK 0x1000 /* 1 */ -#define NFS4_MOUNT_FLAGMASK 0x1033 +#define NFS4_MOUNT_UNSHARED 0x8000 /* 1 */ +#define NFS4_MOUNT_FLAGMASK 0x9033 #endif diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index 0b82a17c705b..a3ade89a64d2 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -62,6 +62,7 @@ struct nfs_mount_data { #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ #define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */ +#define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ #define NFS_MOUNT_FLAGMASK 0xFFFF #endif -- cgit v1.2.3 From 96802a095171f5b35cf0e1e0d4be943e6696a253 Mon Sep 17 00:00:00 2001 From: Frank van Maarseveen Date: Sun, 8 Jul 2007 13:08:54 +0200 Subject: SUNRPC: cleanup transport creation argument passing Cleanup argument passing to functions for creating an RPC transport. Signed-off-by: Frank van Maarseveen Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 34f7590506fa..ea828b09e4ad 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -17,6 +17,8 @@ #include #include +#ifdef __KERNEL__ + extern unsigned int xprt_udp_slot_table_entries; extern unsigned int xprt_tcp_slot_table_entries; @@ -194,7 +196,12 @@ struct rpc_xprt { char * address_strings[RPC_DISPLAY_MAX]; }; -#ifdef __KERNEL__ +struct rpc_xprtsock_create { + int proto; /* IPPROTO_UDP or IPPROTO_TCP */ + struct sockaddr * dstaddr; /* remote peer address */ + size_t addrlen; + struct rpc_timeout * timeout; /* optional timeout parameters */ +}; /* * Transport operations used by ULPs @@ -204,7 +211,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long /* * Generic internal transport functions */ -struct rpc_xprt * xprt_create_transport(int proto, struct sockaddr *addr, size_t size, struct rpc_timeout *toparms); +struct rpc_xprt * xprt_create_transport(struct rpc_xprtsock_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_task *task); @@ -242,8 +249,8 @@ void xprt_disconnect(struct rpc_xprt *xprt); /* * Socket transport setup operations */ -struct rpc_xprt * xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to); -struct rpc_xprt * xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to); +struct rpc_xprt * xs_setup_udp(struct rpc_xprtsock_create *args); +struct rpc_xprt * xs_setup_tcp(struct rpc_xprtsock_create *args); int init_socket_xprt(void); void cleanup_socket_xprt(void); -- cgit v1.2.3 From a97476926ec061f90b77da478620ea6dc71a3237 Mon Sep 17 00:00:00 2001 From: Frank van Maarseveen Date: Mon, 9 Jul 2007 22:21:39 +0200 Subject: SUNRPC server: record the destination address of a request Save the destination address of an incoming request over TCP like is done already for UDP. It is necessary later for callbacks by the server. Signed-off-by: Frank van Maarseveen Signed-off-by: Trond Myklebust --- include/linux/sunrpc/svcsock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index e21dd93ac4b7..a53e0fa855d2 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -59,6 +59,7 @@ struct svc_sock { /* cache of various info for TCP sockets */ void *sk_info_authunix; + struct sockaddr_storage sk_local; /* local address */ struct sockaddr_storage sk_remote; /* remote peer's address */ int sk_remotelen; /* length of address */ }; -- cgit v1.2.3 From d3bc9a1deb8964d774af8535814cb91bf8f6def0 Mon Sep 17 00:00:00 2001 From: Frank van Maarseveen Date: Mon, 9 Jul 2007 22:23:35 +0200 Subject: SUNRPC client: add interface for binding to a local address In addition to binding to a local privileged port the NFS client should allow binding to a specific local address. This is used by the server for callbacks. The patch adds the necessary interface. Signed-off-by: Frank van Maarseveen Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/xprt.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index c1b37972b0d5..c0d9d14983b3 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -98,6 +98,7 @@ struct rpc_create_args { int protocol; struct sockaddr *address; size_t addrsize; + struct sockaddr *saddress; struct rpc_timeout *timeout; char *servername; struct rpc_program *program; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index ea828b09e4ad..d11cedd14f0f 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -198,6 +198,7 @@ struct rpc_xprt { struct rpc_xprtsock_create { int proto; /* IPPROTO_UDP or IPPROTO_TCP */ + struct sockaddr * srcaddr; /* optional local address */ struct sockaddr * dstaddr; /* remote peer address */ size_t addrlen; struct rpc_timeout * timeout; /* optional timeout parameters */ -- cgit v1.2.3 From c98451bdb2f3e6d6cc1e03adad641e9497512b49 Mon Sep 17 00:00:00 2001 From: Frank van Maarseveen Date: Mon, 9 Jul 2007 22:25:29 +0200 Subject: NLM: fix source address of callback to client Use the destination address of the original NLM request as the source address in callbacks to the client. Signed-off-by: Frank van Maarseveen Signed-off-by: Trond Myklebust --- include/linux/lockd/lockd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 05707e2fccae..e2d1ce36b367 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -39,6 +39,7 @@ struct nlm_host { struct hlist_node h_hash; /* doubly linked list */ struct sockaddr_in h_addr; /* peer address */ + struct sockaddr_in h_saddr; /* our address (optional) */ struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */ char * h_name; /* remote hostname */ u32 h_version; /* interface version */ -- cgit v1.2.3 From e06e7c615877026544ad7f8b309d1a3706410383 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Jun 2007 17:22:39 -0700 Subject: [IPV4]: The scheduled removal of multipath cached routing support. With help from Chris Wedgwood. Signed-off-by: David S. Miller --- include/linux/Kbuild | 1 - include/linux/ip_mp_alg.h | 22 ---------------------- include/linux/rtnetlink.h | 2 +- 3 files changed, 1 insertion(+), 24 deletions(-) delete mode 100644 include/linux/ip_mp_alg.h (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index afae306b177c..d94451682761 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -91,7 +91,6 @@ header-y += in6.h header-y += in_route.h header-y += ioctl.h header-y += ipmi_msgdefs.h -header-y += ip_mp_alg.h header-y += ipsec.h header-y += ipx.h header-y += irda.h diff --git a/include/linux/ip_mp_alg.h b/include/linux/ip_mp_alg.h deleted file mode 100644 index e234e2008f5d..000000000000 --- a/include/linux/ip_mp_alg.h +++ /dev/null @@ -1,22 +0,0 @@ -/* ip_mp_alg.h: IPV4 multipath algorithm support, user-visible values. - * - * Copyright (C) 2004, 2005 Einar Lueck - * Copyright (C) 2005 David S. Miller - */ - -#ifndef _LINUX_IP_MP_ALG_H -#define _LINUX_IP_MP_ALG_H - -enum ip_mp_alg { - IP_MP_ALG_NONE, - IP_MP_ALG_RR, - IP_MP_ALG_DRR, - IP_MP_ALG_RANDOM, - IP_MP_ALG_WRANDOM, - __IP_MP_ALG_MAX -}; - -#define IP_MP_ALG_MAX (__IP_MP_ALG_MAX - 1) - -#endif /* _LINUX_IP_MP_ALG_H */ - diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 1fae30af91f3..612785848532 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -261,7 +261,7 @@ enum rtattr_type_t RTA_FLOW, RTA_CACHEINFO, RTA_SESSION, - RTA_MP_ALGO, + RTA_MP_ALGO, /* no longer used */ RTA_TABLE, __RTA_MAX }; -- cgit v1.2.3 From 8c7b7faaa630fef7f68d8728cee1cce398cc9697 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2007 22:08:12 -0700 Subject: [NET]: Kill eth_copy_and_sum(). It hasn't "summed" anything in over 7 years, and it's just a straight mempcy ala skb_copy_to_linear_data() so just get rid of it. Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 071c67abed86..f48eb89efd0f 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -40,12 +40,6 @@ extern int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh); extern struct net_device *alloc_etherdev(int sizeof_priv); -static inline void eth_copy_and_sum (struct sk_buff *dest, - const unsigned char *src, - int len, int base) -{ - memcpy (dest->data, src, len); -} /** * is_zero_ether_addr - Determine if give Ethernet address is all zeros. -- cgit v1.2.3 From 6472ce6096bf27d85a1f2580964a36f290bd60a9 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:03:21 -0700 Subject: [NET]: Mark struct net_device * argument to netdev_priv const Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3a70f553b28f..94cc77cd3aa3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -546,7 +546,7 @@ struct net_device #define NETDEV_ALIGN 32 #define NETDEV_ALIGN_CONST (NETDEV_ALIGN - 1) -static inline void *netdev_priv(struct net_device *dev) +static inline void *netdev_priv(const struct net_device *dev) { return (char *)dev + ((sizeof(struct net_device) + NETDEV_ALIGN_CONST) -- cgit v1.2.3 From 38f7b870d4a6a5d3ec21557e849620cb7d032965 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:03:51 -0700 Subject: [RTNETLINK]: Link creation API Add rtnetlink API for creating, changing and deleting software devices. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_link.h | 13 +++++++++++++ include/linux/netdevice.h | 3 +++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 604c2434f71c..3144babd2357 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -76,6 +76,8 @@ enum #define IFLA_WEIGHT IFLA_WEIGHT IFLA_OPERSTATE, IFLA_LINKMODE, + IFLA_LINKINFO, +#define IFLA_LINKINFO IFLA_LINKINFO __IFLA_MAX }; @@ -140,4 +142,15 @@ struct ifla_cacheinfo __u32 retrans_time; }; +enum +{ + IFLA_INFO_UNSPEC, + IFLA_INFO_KIND, + IFLA_INFO_DATA, + IFLA_INFO_XSTATS, + __IFLA_INFO_MAX, +}; + +#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1) + #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 94cc77cd3aa3..e7913ee5581c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -540,6 +540,9 @@ struct net_device struct device dev; /* space for optional statistics and wireless sysfs groups */ struct attribute_group *sysfs_groups[3]; + + /* rtnetlink link ops */ + const struct rtnl_link_ops *rtnl_link_ops; }; #define to_net_dev(d) container_of(d, struct net_device, dev) -- cgit v1.2.3 From 734423cf38021966a5d3bd5f5c6aaecaf32fb4ac Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:07:07 -0700 Subject: [VLAN]: Use 32 bit value for skb->priority mapping skb->priority has only 32 bits and even VLAN uses 32 bit values in its API. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 81e9bc93569b..aeddb49193f9 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -99,7 +99,7 @@ static inline void vlan_group_set_device(struct vlan_group *vg, int vlan_id, } struct vlan_priority_tci_mapping { - unsigned long priority; + u32 priority; unsigned short vlan_qos; /* This should be shifted when first set, so we only do it * at provisioning time. * ((skb->priority << 13) & 0xE000) @@ -112,7 +112,7 @@ struct vlan_dev_info { /** This will be the mapping that correlates skb->priority to * 3 bits of VLAN QOS tags... */ - unsigned long ingress_priority_map[8]; + u32 ingress_priority_map[8]; struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */ unsigned short vlan_id; /* The VLAN Identifier for this interface. */ -- cgit v1.2.3 From b020cb488586f982f40eb257a32e92a4de710d65 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:07:22 -0700 Subject: [VLAN]: Keep track of number of QoS mappings Keep track of the number of configured ingress/egress QoS mappings to avoid iteration while calculating the netlink attribute size. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index aeddb49193f9..b46d4225f74e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -112,7 +112,10 @@ struct vlan_dev_info { /** This will be the mapping that correlates skb->priority to * 3 bits of VLAN QOS tags... */ + unsigned int nr_ingress_mappings; u32 ingress_priority_map[8]; + + unsigned int nr_egress_mappings; struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */ unsigned short vlan_id; /* The VLAN Identifier for this interface. */ -- cgit v1.2.3 From a4bf3af4ac46802436d352ef409cee4fe80445b3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:07:37 -0700 Subject: [VLAN]: Introduce symbolic constants for flag values Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b46d4225f74e..c7912876a210 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -398,6 +398,10 @@ enum vlan_ioctl_cmds { GET_VLAN_VID_CMD /* Get the VID of this VLAN (specified by name) */ }; +enum vlan_flags { + VLAN_FLAG_REORDER_HDR = 0x1, +}; + enum vlan_name_types { VLAN_NAME_TYPE_PLUS_VID, /* Name will look like: vlan0005 */ VLAN_NAME_TYPE_RAW_PLUS_VID, /* name will look like: eth1.0005 */ -- cgit v1.2.3 From 07b5b17e157b7018d0ca40ca0d1581a23096fb45 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 13 Jun 2007 12:07:54 -0700 Subject: [VLAN]: Use rtnl_link API Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_link.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 3144babd2357..422084d18ce1 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -153,4 +153,38 @@ enum #define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1) +/* VLAN section */ + +enum +{ + IFLA_VLAN_UNSPEC, + IFLA_VLAN_ID, + IFLA_VLAN_FLAGS, + IFLA_VLAN_EGRESS_QOS, + IFLA_VLAN_INGRESS_QOS, + __IFLA_VLAN_MAX, +}; + +#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1) + +struct ifla_vlan_flags { + __u32 flags; + __u32 mask; +}; + +enum +{ + IFLA_VLAN_QOS_UNSPEC, + IFLA_VLAN_QOS_MAPPING, + __IFLA_VLAN_QOS_MAX +}; + +#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1) + +struct ifla_vlan_qos_mapping +{ + __u32 from; + __u32 to; +}; + #endif /* _LINUX_IF_LINK_H */ -- cgit v1.2.3 From f1c91da44728fba24927e44056a56e507c11cf7b Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 16 Jun 2007 12:38:51 -0300 Subject: [KTIME]: Introduce ktime_us_delta This provides a reusable time difference function which returns the difference in microseconds, as often used in the DCCP code. Commiter note: renamed ktime_delta to ktime_us_delta and put it in ktime.h. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/ktime.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 2b139f66027f..923665958f90 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -279,6 +279,11 @@ static inline s64 ktime_to_us(const ktime_t kt) return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec; } +static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier) +{ + return ktime_to_us(ktime_sub(later, earlier)); +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an -- cgit v1.2.3 From 1e180f726a58089d15637b5495fecbad8c50c833 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 16 Jun 2007 12:39:38 -0300 Subject: [KTIME]: Introduce ktime_add_us Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/ktime.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 923665958f90..dae7143644fe 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -284,6 +284,11 @@ static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier) return ktime_to_us(ktime_sub(later, earlier)); } +static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec) +{ + return ktime_add_ns(kt, usec * 1000); +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an -- cgit v1.2.3 From 334a8132d9950f769f390f0f35c233d099688e7a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 25 Jun 2007 04:35:20 -0700 Subject: [SKBUFF]: Keep track of writable header len of headerless clones Currently NAT (and others) that want to modify cloned skbs copy them, even if in the vast majority of cases its not necessary because the skb is a clone made by TCP and the portion NAT wants to modify is actually writable because TCP release the header reference before cloning. The problem is that there is no clean way for NAT to find out how long the writable header area is, so this patch introduces skb->hdr_len to hold this length. When a headerless skb is cloned skb->hdr_len is set to the current headroom, for regular clones it is copied from the original. A new function skb_clone_writable(skb, len) returns whether the skb is writable up to len bytes from skb->data. To avoid enlarging the skb the mac_len field is reduced to 16 bit and the new hdr_len field is put in the remaining 16 bit. I've done a few rough benchmarks of NAT (not with this exact patch, but a very similar one). As expected it saves huge amounts of system time in case of sendfile, bringing it down to basically the same amount as without NAT, with sendmsg it only helps on loopback, probably because of the large MTU. Transmit a 1GB file using sendfile/sendmsg over eth0/lo with and without NAT: - sendfile eth0, no NAT: sys 0m0.388s - sendfile eth0, NAT: sys 0m1.835s - sendfile eth0: NAT + path: sys 0m0.370s (~ -80%) - sendfile lo, no NAT: sys 0m0.258s - sendfile lo, NAT: sys 0m2.609s - sendfile lo, NAT + patch: sys 0m0.260s (~ -90%) - sendmsg eth0, no NAT: sys 0m2.508s - sendmsg eth0, NAT: sys 0m2.539s - sendmsg eth0, NAT + patch: sys 0m2.445s (no change) - sendmsg lo, no NAT: sys 0m2.151s - sendmsg lo, NAT: sys 0m3.557s - sendmsg lo, NAT + patch: sys 0m2.159s (~ -40%) I expect other users can see a similar performance improvement, packet mangling iptables targets, ipip and ip_gre come to mind .. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f0b2f7d0010..881fe80f01d0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -147,8 +147,8 @@ struct skb_shared_info { /* We divide dataref into two halves. The higher 16 bits hold references * to the payload part of skb->data. The lower 16 bits hold references to - * the entire skb->data. It is up to the users of the skb to agree on - * where the payload starts. + * the entire skb->data. A clone of a headerless skb holds the length of + * the header in skb->hdr_len. * * All users must obey the rule that the skb->data reference count must be * greater than or equal to the payload reference count. @@ -206,6 +206,7 @@ typedef unsigned char *sk_buff_data_t; * @len: Length of actual data * @data_len: Data length * @mac_len: Length of link layer header + * @hdr_len: writable header length of cloned skb * @csum: Checksum (must include start/offset pair) * @csum_start: Offset from skb->head where checksumming should start * @csum_offset: Offset from csum_start where checksum should be stored @@ -260,8 +261,9 @@ struct sk_buff { char cb[48]; unsigned int len, - data_len, - mac_len; + data_len; + __u16 mac_len, + hdr_len; union { __wsum csum; struct { @@ -1321,6 +1323,20 @@ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, return __netdev_alloc_skb(dev, length, GFP_ATOMIC); } +/** + * skb_clone_writable - is the header of a clone writable + * @skb: buffer to check + * @len: length up to which to write + * + * Returns true if modifying the header part of the cloned buffer + * does not requires the data to be copied. + */ +static inline int skb_clone_writable(struct sk_buff *skb, int len) +{ + return !skb_header_cloned(skb) && + skb_headroom(skb) + len <= skb->hdr_len; +} + /** * skb_cow - copy header of skb when it is required * @skb: buffer to cow -- cgit v1.2.3 From afdc3238ec948531205f5c5f77d2de7bae519c71 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 25 Jun 2007 14:30:16 -0700 Subject: [RTNETLINK]: Add nested compat attribute Add a nested compat attribute type that can be used to convert attributes that contain a structure to nested attributes in a backwards compatible way. The attribute looks like this: struct { [ compat contents ] struct rtattr { .rta_len = total size, .rta_type = type, } rta; struct old_structure struct; [ nested top-level attribute ] struct rtattr { .rta_len = nest size, .rta_type = type, } nest_attr; [ optional 0 .. n nested attributes ] struct rtattr { .rta_len = private attribute len, .rta_type = private attribute typ, } nested_attr; struct nested_data data; }; Since both userspace and kernel deal correctly with attributes that are larger than expected old versions will just parse the compat part and ignore the rest. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 612785848532..6731e7f4cc0f 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -570,6 +570,8 @@ static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str) } extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len); +extern int rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, + struct rtattr *rta, void **data, int len); #define rtattr_parse_nested(tb, max, rta) \ rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta))) @@ -638,6 +640,18 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi ({ (start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \ (skb)->len; }) +#define RTA_NEST_COMPAT(skb, type, attrlen, data) \ +({ struct rtattr *__start = (struct rtattr *)skb_tail_pointer(skb); \ + RTA_PUT(skb, type, attrlen, data); \ + RTA_NEST(skb, type); \ + __start; }) + +#define RTA_NEST_COMPAT_END(skb, start) \ +({ struct rtattr *__nest = (void *)(start) + NLMSG_ALIGN((start)->rta_len); \ + (start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \ + RTA_NEST_END(skb, __nest); \ + (skb)->len; }) + #define RTA_NEST_CANCEL(skb, start) \ ({ if (start) \ skb_trim(skb, (unsigned char *) (start) - (skb)->data); \ -- cgit v1.2.3 From 2371baa4bdab3268b32009926f75e7a5d3a41506 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 26 Jun 2007 03:23:44 -0700 Subject: [RTNETLINK]: Fix rtnetlink compat attribute patch Sent the wrong patch previously. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 6731e7f4cc0f..c91476ce314a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -570,12 +570,16 @@ static __inline__ int rtattr_strcmp(const struct rtattr *rta, const char *str) } extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len); -extern int rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, - struct rtattr *rta, void **data, int len); +extern int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, + struct rtattr *rta, int len); #define rtattr_parse_nested(tb, max, rta) \ rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta))) +#define rtattr_parse_nested_compat(tb, max, rta, data, len) \ +({ data = RTA_PAYLOAD(rta) >= len ? RTA_DATA(rta) : NULL; \ + __rtattr_parse_nested_compat(tb, max, rta, len); }) + extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); extern int rtnl_unicast(struct sk_buff *skb, u32 pid); extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, -- cgit v1.2.3 From 59fbb3a61e02deaeaa4fb50792217921f3002d64 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Tue, 26 Jun 2007 23:56:32 -0700 Subject: [IPV6] MIP6: Loadable module support for MIPv6. This patch makes MIPv6 loadable module named "mip6". Here is a modprobe.conf(5) example to load it automatically when user application uses XFRM state for MIPv6: alias xfrm-type-10-43 mip6 alias xfrm-type-10-60 mip6 Some MIPv6 feature is not included by this modular, however, it should not be affected to other features like either IPsec or IPv6 with and without the patch. We may discuss XFRM, MH (RAW socket) and ancillary data/sockopt separately for future work. Loadable features: * MH receiving check (to send ICMP error back) * RO header parsing and building (i.e. RH2 and HAO in DSTOPTS) * XFRM policy/state database handling for RO These are NOT covered as loadable: * Home Address flags and its rule on source address selection * XFRM sub policy (depends on its own kernel option) * XFRM functions to receive RO as IPv6 extension header * MH sending/receiving through raw socket if user application opens it (since raw socket allows to do so) * RH2 sending as ancillary data * RH2 operation with setsockopt(2) Signed-off-by: Masahide NAKAMURA Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 648bd1f0912d..213b63be3c8f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -247,7 +247,7 @@ struct inet6_skb_parm { __u16 lastopt; __u32 nhoff; __u16 flags; -#ifdef CONFIG_IPV6_MIP6 +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dsthao; #endif -- cgit v1.2.3 From d212f87b068c9d72065ef579d85b5ee6b8b59381 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 27 Jun 2007 00:47:37 -0700 Subject: [NET]: IPV6 checksum offloading in network devices The existing model for checksum offload does not correctly handle devices that can offload IPV4 and IPV6 only. The NETIF_F_HW_CSUM flag implies device can do any arbitrary protocol. This patch: * adds NETIF_F_IPV6_CSUM for those devices * fixes bnx2 and tg3 devices that need it * add NETIF_F_IPV6_CSUM to ipv6 output (incl GSO) * fixes assumptions about NETIF_F_ALL_CSUM in nat * adjusts bridge union of checksumming computation Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e7913ee5581c..7a8f22fb4eee 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -314,9 +314,10 @@ struct net_device /* Net device features */ unsigned long features; #define NETIF_F_SG 1 /* Scatter/gather IO. */ -#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */ +#define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */ #define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ #define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ +#define NETIF_F_IPV6_CSUM 16 /* Can checksum TCP/UDP over IPV6 */ #define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ #define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ #define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ @@ -338,8 +339,11 @@ struct net_device /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6) + #define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) -#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM) +#define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM) +#define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) +#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) struct net_device *next_sched; -- cgit v1.2.3 From bf742482d7a647c5c6f03f78eb35a862e159ecf5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 27 Jun 2007 01:26:19 -0700 Subject: [NET]: dev: introduce generic net_device address lists Introduce struct dev_addr_list and list maintenance functions based on dev_mc_list and the related functions. This will be used by follow-up patches for both multicast and secondary unicast addresses. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7a8f22fb4eee..aa389c77aa3e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -177,6 +177,14 @@ struct netif_rx_stats DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat); +struct dev_addr_list +{ + struct dev_addr_list *next; + u8 da_addr[MAX_ADDR_LEN]; + u8 da_addrlen; + int da_users; + int da_gusers; +}; /* * We tag multicasts with these structures. @@ -1008,6 +1016,9 @@ extern void dev_mc_upload(struct net_device *dev); extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); extern void dev_mc_discard(struct net_device *dev); +extern int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, int all); +extern int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int newonly); +extern void __dev_addr_discard(struct dev_addr_list **list); extern void dev_set_promiscuity(struct net_device *dev, int inc); extern void dev_set_allmulti(struct net_device *dev, int inc); extern void netdev_state_change(struct net_device *dev); -- cgit v1.2.3 From 3fba5a8b1e3df2384b90493538161e83cf15dd5f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 27 Jun 2007 01:26:58 -0700 Subject: [NET]: dev_mcast: switch to generic net_device address lists Use generic net_device address lists for multicast list handling. Some defines are used to keep drivers working. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aa389c77aa3e..9e114e77e54d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -189,15 +189,12 @@ struct dev_addr_list /* * We tag multicasts with these structures. */ - -struct dev_mc_list -{ - struct dev_mc_list *next; - __u8 dmi_addr[MAX_ADDR_LEN]; - unsigned char dmi_addrlen; - int dmi_users; - int dmi_gusers; -}; + +#define dev_mc_list dev_addr_list +#define dmi_addr da_addr +#define dmi_addrlen da_addrlen +#define dmi_users da_users +#define dmi_gusers da_gusers struct hh_cache { @@ -400,7 +397,7 @@ struct net_device unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ - struct dev_mc_list *mc_list; /* Multicast mac addresses */ + struct dev_addr_list *mc_list; /* Multicast mac addresses */ int mc_count; /* Number of installed mcasts */ int promiscuity; int allmulti; -- cgit v1.2.3 From 4417da668c0021903464f92db278ddae348e0299 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 27 Jun 2007 01:28:10 -0700 Subject: [NET]: dev: secondary unicast address support Add support for configuring secondary unicast addresses on network devices. To support this devices capable of filtering multiple unicast addresses need to change their set_multicast_list function to configure unicast filters as well and assign it to dev->set_rx_mode instead of dev->set_multicast_list. Other devices are put into promiscous mode when secondary unicast addresses are present. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9e114e77e54d..2c0cc19edfb2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -397,6 +397,9 @@ struct net_device unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ + struct dev_addr_list *uc_list; /* Secondary unicast mac addresses */ + int uc_count; /* Number of installed ucasts */ + int uc_promisc; struct dev_addr_list *mc_list; /* Multicast mac addresses */ int mc_count; /* Number of installed mcasts */ int promiscuity; @@ -502,6 +505,8 @@ struct net_device void *saddr, unsigned len); int (*rebuild_header)(struct sk_buff *skb); +#define HAVE_SET_RX_MODE + void (*set_rx_mode)(struct net_device *dev); #define HAVE_MULTICAST void (*set_multicast_list)(struct net_device *dev); #define HAVE_SET_MAC_ADDR @@ -1008,8 +1013,11 @@ extern struct net_device *alloc_netdev(int sizeof_priv, const char *name, void (*setup)(struct net_device *)); extern int register_netdev(struct net_device *dev); extern void unregister_netdev(struct net_device *dev); -/* Functions used for multicast support */ -extern void dev_mc_upload(struct net_device *dev); +/* Functions used for secondary unicast and multicast support */ +extern void dev_set_rx_mode(struct net_device *dev); +extern void __dev_set_rx_mode(struct net_device *dev); +extern int dev_unicast_delete(struct net_device *dev, void *addr, int alen); +extern int dev_unicast_add(struct net_device *dev, void *addr, int alen); extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); extern void dev_mc_discard(struct net_device *dev); -- cgit v1.2.3 From 342f0234c71b40da785dd6a7ce1dd481ecbfdb81 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Wed, 27 Jun 2007 15:37:46 -0700 Subject: [UDP]: Introduce UDP encapsulation type for L2TP This patch adds a new UDP_ENCAP_L2TPINUDP encapsulation type for UDP sockets. When a UDP socket's encap_type is UDP_ENCAP_L2TPINUDP, the skb is delivered to a function pointed to by the udp_sock's encap_rcv funcptr. If the skb isn't wanted by L2TP, it returns >0, which causes it to be passed through to UDP. Include padding to put the new encap_rcv field on a 4-byte boundary. Previously, the only user of UDP encap sockets was ESP, so when CONFIG_XFRM was not defined, some of the encap code was compiled out. This patch changes that. As a result, udp_encap_rcv() will now do a little more work when CONFIG_XFRM is not defined. Signed-off-by: James Chapman Signed-off-by: David S. Miller --- include/linux/udp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 6de445c31a64..8ec703f462da 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -42,6 +42,7 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb) /* UDP encapsulation types */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ #define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-06 */ +#define UDP_ENCAP_L2TPINUDP 3 /* rfc2661 */ #ifdef __KERNEL__ #include @@ -70,6 +71,11 @@ struct udp_sock { #define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ #define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ + __u8 unused[3]; + /* + * For encapsulation sockets. + */ + int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); }; static inline struct udp_sock *udp_sk(const struct sock *sk) -- cgit v1.2.3 From cf14a4d06742d59ecb2d837a3f53bb24d1ff9acb Mon Sep 17 00:00:00 2001 From: James Chapman Date: Wed, 27 Jun 2007 15:43:43 -0700 Subject: [L2TP]: Changes to existing ppp and socket kernel headers for L2TP Add struct sockaddr_pppol2tp to carry L2TP-specific address information for the PPPoX (PPPoL2TP) socket. Unfortunately we can't use the union inside struct sockaddr_pppox because the L2TP-specific data is larger than the current size of the union and we must preserve the size of struct sockaddr_pppox for binary compatibility. Also add a PPPIOCGL2TPSTATS ioctl to allow userspace to obtain L2TP counters and state from the kernel. Add new if_pppol2tp.h header. [ Modified to use aligned_u64 in statistics structure -DaveM ] Signed-off-by: James Chapman Signed-off-by: David S. Miller --- include/linux/Kbuild | 1 + include/linux/if_ppp.h | 16 +++++++++++ include/linux/if_pppol2tp.h | 69 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/if_pppox.h | 16 +++++++++-- include/linux/socket.h | 1 + 5 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 include/linux/if_pppol2tp.h (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index d94451682761..127d2d192b5a 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -225,6 +225,7 @@ unifdef-y += if_fddi.h unifdef-y += if_frad.h unifdef-y += if_ltalk.h unifdef-y += if_link.h +unifdef-y += if_pppol2tp.h unifdef-y += if_pppox.h unifdef-y += if_shaper.h unifdef-y += if_tr.h diff --git a/include/linux/if_ppp.h b/include/linux/if_ppp.h index 768372f07caa..0f2f70d4e48c 100644 --- a/include/linux/if_ppp.h +++ b/include/linux/if_ppp.h @@ -110,6 +110,21 @@ struct ifpppcstatsreq { struct ppp_comp_stats stats; }; +/* For PPPIOCGL2TPSTATS */ +struct pppol2tp_ioc_stats { + __u16 tunnel_id; /* redundant */ + __u16 session_id; /* if zero, get tunnel stats */ + __u32 using_ipsec:1; /* valid only for session_id == 0 */ + aligned_u64 tx_packets; + aligned_u64 tx_bytes; + aligned_u64 tx_errors; + aligned_u64 rx_packets; + aligned_u64 rx_bytes; + aligned_u64 rx_seq_discards; + aligned_u64 rx_oos_packets; + aligned_u64 rx_errors; +}; + #define ifr__name b.ifr_ifrn.ifrn_name #define stats_ptr b.ifr_ifru.ifru_data @@ -146,6 +161,7 @@ struct ifpppcstatsreq { #define PPPIOCDISCONN _IO('t', 57) /* disconnect channel */ #define PPPIOCATTCHAN _IOW('t', 56, int) /* attach to ppp channel */ #define PPPIOCGCHAN _IOR('t', 55, int) /* get ppp channel number */ +#define PPPIOCGL2TPSTATS _IOR('t', 54, struct pppol2tp_ioc_stats) #define SIOCGPPPSTATS (SIOCDEVPRIVATE + 0) #define SIOCGPPPVER (SIOCDEVPRIVATE + 1) /* NEVER change this!! */ diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h new file mode 100644 index 000000000000..516203b6fdeb --- /dev/null +++ b/include/linux/if_pppol2tp.h @@ -0,0 +1,69 @@ +/*************************************************************************** + * Linux PPP over L2TP (PPPoL2TP) Socket Implementation (RFC 2661) + * + * This file supplies definitions required by the PPP over L2TP driver + * (pppol2tp.c). All version information wrt this file is located in pppol2tp.c + * + * License: + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef __LINUX_IF_PPPOL2TP_H +#define __LINUX_IF_PPPOL2TP_H + +#include + +#ifdef __KERNEL__ +#include +#endif + +/* Structure used to connect() the socket to a particular tunnel UDP + * socket. + */ +struct pppol2tp_addr +{ + pid_t pid; /* pid that owns the fd. + * 0 => current */ + int fd; /* FD of UDP socket to use */ + + struct sockaddr_in addr; /* IP address and port to send to */ + + __be16 s_tunnel, s_session; /* For matching incoming packets */ + __be16 d_tunnel, d_session; /* For sending outgoing packets */ +}; + +/* Socket options: + * DEBUG - bitmask of debug message categories + * SENDSEQ - 0 => don't send packets with sequence numbers + * 1 => send packets with sequence numbers + * RECVSEQ - 0 => receive packet sequence numbers are optional + * 1 => drop receive packets without sequence numbers + * LNSMODE - 0 => act as LAC. + * 1 => act as LNS. + * REORDERTO - reorder timeout (in millisecs). If 0, don't try to reorder. + */ +enum { + PPPOL2TP_SO_DEBUG = 1, + PPPOL2TP_SO_RECVSEQ = 2, + PPPOL2TP_SO_SENDSEQ = 3, + PPPOL2TP_SO_LNSMODE = 4, + PPPOL2TP_SO_REORDERTO = 5, +}; + +/* Debug message categories for the DEBUG socket option */ +enum { + PPPOL2TP_MSG_DEBUG = (1 << 0), /* verbose debug (if + * compiled in) */ + PPPOL2TP_MSG_CONTROL = (1 << 1), /* userspace - kernel + * interface */ + PPPOL2TP_MSG_SEQ = (1 << 2), /* sequence numbers */ + PPPOL2TP_MSG_DATA = (1 << 3), /* data packets */ +}; + + + +#endif diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 6f987be60fe2..25652545ba6e 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -27,6 +27,7 @@ #include #include #endif /* __KERNEL__ */ +#include /* For user-space programs to pick up these definitions * which they wouldn't get otherwise without defining __KERNEL__ @@ -50,8 +51,9 @@ struct pppoe_addr{ * Protocols supported by AF_PPPOX */ #define PX_PROTO_OE 0 /* Currently just PPPoE */ -#define PX_MAX_PROTO 1 - +#define PX_PROTO_OL2TP 1 /* Now L2TP also */ +#define PX_MAX_PROTO 2 + struct sockaddr_pppox { sa_family_t sa_family; /* address family, AF_PPPOX */ unsigned int sa_protocol; /* protocol identifier */ @@ -60,6 +62,16 @@ struct sockaddr_pppox { }sa_addr; }__attribute__ ((packed)); +/* The use of the above union isn't viable because the size of this + * struct must stay fixed over time -- applications use sizeof(struct + * sockaddr_pppox) to fill it. We use a protocol specific sockaddr + * type instead. + */ +struct sockaddr_pppol2tp { + sa_family_t sa_family; /* address family, AF_PPPOX */ + unsigned int sa_protocol; /* protocol identifier */ + struct pppol2tp_addr pppol2tp; +}__attribute__ ((packed)); /********************************************************************* * diff --git a/include/linux/socket.h b/include/linux/socket.h index 6e7c9483a6a6..fe195c97a89d 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -287,6 +287,7 @@ struct ucred { #define SOL_NETLINK 270 #define SOL_TIPC 271 #define SOL_RXRPC 272 +#define SOL_PPPOL2TP 273 /* IPX options */ #define IPX_TYPE 1 -- cgit v1.2.3 From f25f4e44808f0f6c9875d94ef1c41ef86c288eb2 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Fri, 6 Jul 2007 13:36:20 -0700 Subject: [CORE] Stack changes to add multiqueue hardware support API Add the multiqueue hardware device support API to the core network stack. Allow drivers to allocate multiple queues and manage them at the netdev level if they choose to do so. Added a new field to sk_buff, namely queue_mapping, for drivers to know which tx_ring to select based on OS classification of the flow. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 3 +- include/linux/netdevice.h | 80 ++++++++++++++++++++++++++++++++++++++++++--- include/linux/skbuff.h | 25 ++++++++++++-- 3 files changed, 99 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index f48eb89efd0f..6cdb97365e47 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -39,7 +39,8 @@ extern void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev extern int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh); -extern struct net_device *alloc_etherdev(int sizeof_priv); +extern struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count); +#define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1) /** * is_zero_ether_addr - Determine if give Ethernet address is all zeros. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2c0cc19edfb2..9817821729c4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -108,6 +108,14 @@ struct wireless_dev; #define MAX_HEADER (LL_MAX_HEADER + 48) #endif +struct net_device_subqueue +{ + /* Give a control state for each queue. This struct may contain + * per-queue locks in the future. + */ + unsigned long state; +}; + /* * Network device statistics. Akin to the 2.0 ether stats but * with byte counters. @@ -331,6 +339,7 @@ struct net_device #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_GSO 2048 /* Enable software GSO. */ #define NETIF_F_LLTX 4096 /* LockLess TX */ +#define NETIF_F_MULTI_QUEUE 16384 /* Has multiple TX/RX queues */ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 @@ -557,6 +566,10 @@ struct net_device /* rtnetlink link ops */ const struct rtnl_link_ops *rtnl_link_ops; + + /* The TX queue control structures */ + unsigned int egress_subqueue_count; + struct net_device_subqueue egress_subqueue[0]; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -565,9 +578,7 @@ struct net_device static inline void *netdev_priv(const struct net_device *dev) { - return (char *)dev + ((sizeof(struct net_device) - + NETDEV_ALIGN_CONST) - & ~NETDEV_ALIGN_CONST); + return dev->priv; } #define SET_MODULE_OWNER(dev) do { } while (0) @@ -719,6 +730,62 @@ static inline int netif_running(const struct net_device *dev) return test_bit(__LINK_STATE_START, &dev->state); } +/* + * Routines to manage the subqueues on a device. We only need start + * stop, and a check if it's stopped. All other device management is + * done at the overall netdevice level. + * Also test the device if we're multiqueue. + */ +static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + clear_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state); +#endif +} + +static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE +#ifdef CONFIG_NETPOLL_TRAP + if (netpoll_trap()) + return; +#endif + set_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state); +#endif +} + +static inline int netif_subqueue_stopped(const struct net_device *dev, + u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + return test_bit(__LINK_STATE_XOFF, + &dev->egress_subqueue[queue_index].state); +#else + return 0; +#endif +} + +static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE +#ifdef CONFIG_NETPOLL_TRAP + if (netpoll_trap()) + return; +#endif + if (test_and_clear_bit(__LINK_STATE_XOFF, + &dev->egress_subqueue[queue_index].state)) + __netif_schedule(dev); +#endif +} + +static inline int netif_is_multiqueue(const struct net_device *dev) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + return (!!(NETIF_F_MULTI_QUEUE & dev->features)); +#else + return 0; +#endif +} /* Use this variant when it is known for sure that it * is executing from interrupt context. @@ -1009,8 +1076,11 @@ static inline void netif_tx_disable(struct net_device *dev) extern void ether_setup(struct net_device *dev); /* Support for loadable net-drivers */ -extern struct net_device *alloc_netdev(int sizeof_priv, const char *name, - void (*setup)(struct net_device *)); +extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), + unsigned int queue_count); +#define alloc_netdev(sizeof_priv, name, setup) \ + alloc_netdev_mq(sizeof_priv, name, setup, 1) extern int register_netdev(struct net_device *dev); extern void unregister_netdev(struct net_device *dev); /* Functions used for secondary unicast and multicast support */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 881fe80f01d0..2d6a14f5f2f1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -196,7 +196,6 @@ typedef unsigned char *sk_buff_data_t; * @sk: Socket we are owned by * @tstamp: Time we arrived * @dev: Device we arrived on/are leaving by - * @iif: ifindex of device we arrived on * @transport_header: Transport layer header * @network_header: Network layer header * @mac_header: Link layer header @@ -231,6 +230,8 @@ typedef unsigned char *sk_buff_data_t; * @nfctinfo: Relationship of this skb to the connection * @nfct_reasm: netfilter conntrack re-assembly pointer * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c + * @iif: ifindex of device we arrived on + * @queue_mapping: Queue mapping for multiqueue devices * @tc_index: Traffic control index * @tc_verd: traffic control verdict * @dma_cookie: a cookie to one of several possible DMA operations @@ -246,8 +247,6 @@ struct sk_buff { struct sock *sk; ktime_t tstamp; struct net_device *dev; - int iif; - /* 4 byte hole on 64 bit*/ struct dst_entry *dst; struct sec_path *sp; @@ -290,12 +289,18 @@ struct sk_buff { #ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info *nf_bridge; #endif + + int iif; + __u16 queue_mapping; + #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT __u16 tc_verd; /* traffic control verdict */ #endif #endif + /* 2 byte hole */ + #ifdef CONFIG_NET_DMA dma_cookie_t dma_cookie; #endif @@ -1725,6 +1730,20 @@ static inline void skb_init_secmark(struct sk_buff *skb) { } #endif +static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + skb->queue_mapping = queue_mapping; +#endif +} + +static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from) +{ +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + to->queue_mapping = from->queue_mapping; +#endif +} + static inline int skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; -- cgit v1.2.3 From d62733c8e437fdb58325617c4b3331769ba82d70 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Thu, 28 Jun 2007 21:04:31 -0700 Subject: [SCHED]: Qdisc changes and sch_rr added for multiqueue Add the new sch_rr qdisc for multiqueue network device support. Allow sch_prio and sch_rr to be compiled with or without multiqueue hardware support. sch_rr is part of sch_prio, and is referenced from MODULE_ALIAS. This was done since sch_prio and sch_rr only differ in their dequeue routine. Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index d10f35338507..268c51599eb8 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -101,6 +101,15 @@ struct tc_prio_qopt __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ }; +enum +{ + TCA_PRIO_UNSPEC, + TCA_PRIO_MQ, + __TCA_PRIO_MAX +}; + +#define TCA_PRIO_MAX (__TCA_PRIO_MAX - 1) + /* TBF section */ struct tc_tbf_qopt -- cgit v1.2.3 From 61cbc2fca6335be52788773b21efdc52a2750924 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 30 Jun 2007 13:35:52 -0700 Subject: [NET]: Fix secondary unicast/multicast address count maintenance When a reference to an existing address is increased or decreased without hitting zero, the address count is incorrectly adjusted. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9817821729c4..8590d685d935 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1091,8 +1091,8 @@ extern int dev_unicast_add(struct net_device *dev, void *addr, int alen); extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); extern void dev_mc_discard(struct net_device *dev); -extern int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen, int all); -extern int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int newonly); +extern int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all); +extern int __dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly); extern void __dev_addr_discard(struct dev_addr_list **list); extern void dev_set_promiscuity(struct net_device *dev, int inc); extern void dev_set_allmulti(struct net_device *dev, int inc); -- cgit v1.2.3 From 8c644623fe7e41f59fe97cdf666cba3cb7ced7d8 Mon Sep 17 00:00:00 2001 From: Guido Guenther Date: Mon, 2 Jul 2007 22:50:25 -0700 Subject: [NET]: Allow group ownership of TUN/TAP devices. Introduce a new syscall TUNSETGROUP for group ownership setting of tap devices. The user now is allowed to send packages if either his euid or his egid matches the one specified via tunctl (via -u or -g respecitvely). If both, gid and uid, are set via tunctl, both have to match. Signed-off-by: Guido Guenther Signed-off-by: Jeff Dike Signed-off-by: David S. Miller --- include/linux/if_tun.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 88aef7b86ef4..42eb6945b93e 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -36,6 +36,7 @@ struct tun_struct { unsigned long flags; int attached; uid_t owner; + gid_t group; wait_queue_head_t read_wait; struct sk_buff_head readq; @@ -78,6 +79,7 @@ struct tun_struct { #define TUNSETPERSIST _IOW('T', 203, int) #define TUNSETOWNER _IOW('T', 204, int) #define TUNSETLINK _IOW('T', 205, int) +#define TUNSETGROUP _IOW('T', 206, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 -- cgit v1.2.3 From 89da1ecf5483e6aa29b456a15ad6d05a6797c5a5 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 2 Jul 2007 22:54:18 -0700 Subject: [IrDA]: Netlink layer. First IrDA configuration netlink layer implementation. Currently, we only support the set/get mode commands. Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- include/linux/irda.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irda.h b/include/linux/irda.h index 945ba3110874..35911bd4dbe8 100644 --- a/include/linux/irda.h +++ b/include/linux/irda.h @@ -216,6 +216,33 @@ struct if_irda_req { #define ifr_dtr ifr_ifru.ifru_line.dtr #define ifr_rts ifr_ifru.ifru_line.rts + +/* IrDA netlink definitions */ +#define IRDA_NL_NAME "irda" +#define IRDA_NL_VERSION 1 + +enum irda_nl_commands { + IRDA_NL_CMD_UNSPEC, + IRDA_NL_CMD_SET_MODE, + IRDA_NL_CMD_GET_MODE, + + __IRDA_NL_CMD_AFTER_LAST +}; +#define IRDA_NL_CMD_MAX (__IRDA_NL_CMD_AFTER_LAST - 1) + +enum nl80211_attrs { + IRDA_NL_ATTR_UNSPEC, + IRDA_NL_ATTR_IFNAME, + IRDA_NL_ATTR_MODE, + + __IRDA_NL_ATTR_AFTER_LAST +}; +#define IRDA_NL_ATTR_MAX (__IRDA_NL_ATTR_AFTER_LAST - 1) + +/* IrDA modes */ +#define IRDA_MODE_PRIMARY 0x1 +#define IRDA_MODE_SECONDARY 0x2 + #endif /* KERNEL_IRDA_H */ -- cgit v1.2.3 From 411725280bd0058ebb83c0e32133b7a94902c3a6 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 2 Jul 2007 22:55:31 -0700 Subject: [IrDA]: Monitor mode. Through the IrDA netlink set mode command, we switch to IrDA monitor mode, where one IrLAP instance receives all the packets on the media, without ever responding to them. Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- include/linux/irda.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irda.h b/include/linux/irda.h index 35911bd4dbe8..8e3735714c1c 100644 --- a/include/linux/irda.h +++ b/include/linux/irda.h @@ -242,6 +242,7 @@ enum nl80211_attrs { /* IrDA modes */ #define IRDA_MODE_PRIMARY 0x1 #define IRDA_MODE_SECONDARY 0x2 +#define IRDA_MODE_MONITOR 0x4 #endif /* KERNEL_IRDA_H */ -- cgit v1.2.3 From 7bfe24611671ec76b44281e582b38535e21f01a9 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:14:23 -0700 Subject: [NETFILTER]: ip6_tables: fix explanation of valid upper protocol number This explains the allowed upper protocol numbers. IP6T_F_NOPROTO was introduced to use 0 as Hop-by-Hop option header, not wildcard. But that seemed to be forgotten. 0 has been used as wildcard since 2002-08-23. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6_tables.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 4686f8342cbd..9a720f05888f 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -44,8 +44,14 @@ struct ip6t_ip6 { char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; - /* ARGH, HopByHop uses 0, so can't do 0 = ANY, - instead IP6T_F_NOPROTO must be set */ + /* Upper protocol number + * - The allowed value is 0 (any) or protocol number of last parsable + * header, which is 50 (ESP), 59 (No Next Header), 135 (MH), or + * the non IPv6 extension headers. + * - The protocol numbers of IPv6 extension headers except of ESP and + * MH do not match any packets. + * - You also need to set IP6T_FLAGS_PROTO to "flags" to check protocol. + */ u_int16_t proto; /* TOS to match iff flags & IP6T_F_TOS */ u_int8_t tos; -- cgit v1.2.3 From cff533ac12494fa002e2c46acc94d670e5f636a2 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:15:12 -0700 Subject: [NETFILTER]: x_tables: switch hotdrop to bool Switch the "hotdrop" variables to boolean Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 7e733a6ba4f6..b8577d18d10d 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -148,7 +148,7 @@ struct xt_match const void *matchinfo, int offset, unsigned int protoff, - int *hotdrop); + bool *hotdrop); /* Called when user tries to insert an entry of this type. */ /* Should return true or false. */ -- cgit v1.2.3 From 1d93a9cbad608f6398ba6c5b588c504ccd35a2ca Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:15:35 -0700 Subject: [NETFILTER]: x_tables: switch xt_match->match to bool Switch the return type of match functions to boolean Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index b8577d18d10d..304fce356a43 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -141,14 +141,14 @@ struct xt_match /* Arguments changed since 2.6.9, as this must now handle non-linear skb, using skb_header_pointer and skb_ip_make_writable. */ - int (*match)(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop); + bool (*match)(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + bool *hotdrop); /* Called when user tries to insert an entry of this type. */ /* Should return true or false. */ -- cgit v1.2.3 From ccb79bdce71f2c04cfa9bfcbaf4d37e2f963d684 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:16:00 -0700 Subject: [NETFILTER]: x_tables: switch xt_match->checkentry to bool Switch the return type of match functions to boolean Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 304fce356a43..5130dd60a2fc 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -152,11 +152,11 @@ struct xt_match /* Called when user tries to insert an entry of this type. */ /* Should return true or false. */ - int (*checkentry)(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask); + bool (*checkentry)(const char *tablename, + const void *ip, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_match *match, void *matchinfo); -- cgit v1.2.3 From e1931b784a8de324abf310fa3b5e3f25d3988233 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:16:26 -0700 Subject: [NETFILTER]: x_tables: switch xt_target->checkentry to bool Switch the return type of target checkentry functions to boolean. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5130dd60a2fc..64f425a855bb 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -202,11 +202,11 @@ struct xt_target hook_mask is a bitmask of hooks from which it can be called. */ /* Should return true or false. */ - int (*checkentry)(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask); + bool (*checkentry)(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_target *target, void *targinfo); -- cgit v1.2.3 From 1b50b8a371e90a5e110f466e4ac02cf6b5f681de Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 7 Jul 2007 22:20:36 -0700 Subject: [NETFILTER]: Add u32 match Along comes... xt_u32, a revamped ipt_u32 from POM-NG, Plus: * 2007-06-02: added ipv6 support * 2007-06-05: uses kmalloc for the big buffer * 2007-06-05: added inversion * 2007-06-20: use skb_copy_bits() and get rid of the big buffer and lock (suggested by Pablo Neira Ayuso) Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_u32.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 include/linux/netfilter/xt_u32.h (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_u32.h b/include/linux/netfilter/xt_u32.h new file mode 100644 index 000000000000..9947f56cdbdd --- /dev/null +++ b/include/linux/netfilter/xt_u32.h @@ -0,0 +1,40 @@ +#ifndef _XT_U32_H +#define _XT_U32_H 1 + +enum xt_u32_ops { + XT_U32_AND, + XT_U32_LEFTSH, + XT_U32_RIGHTSH, + XT_U32_AT, +}; + +struct xt_u32_location_element { + u_int32_t number; + u_int8_t nextop; +}; + +struct xt_u32_value_element { + u_int32_t min; + u_int32_t max; +}; + +/* + * Any way to allow for an arbitrary number of elements? + * For now, I settle with a limit of 10 each. + */ +#define XT_U32_MAXSIZE 10 + +struct xt_u32_test { + struct xt_u32_location_element location[XT_U32_MAXSIZE+1]; + struct xt_u32_value_element value[XT_U32_MAXSIZE+1]; + u_int8_t nnums; + u_int8_t nvalues; +}; + +struct xt_u32 { + struct xt_u32_test tests[XT_U32_MAXSIZE+1]; + u_int8_t ntests; + u_int8_t invert; +}; + +#endif /* _XT_U32_H */ -- cgit v1.2.3 From ba9dda3ab5a865542e69dfe01edb2436857c9420 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 7 Jul 2007 22:21:23 -0700 Subject: [NETFILTER]: x_tables: add TRACE target The TRACE target can be used to follow IP and IPv6 packets through the ruleset. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick NcHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2d6a14f5f2f1..625d73b07ab7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -227,6 +227,7 @@ typedef unsigned char *sk_buff_data_t; * @mark: Generic packet mark * @nfct: Associated connection, if any * @ipvs_property: skbuff is owned by ipvs + * @nf_trace: netfilter packet trace flag * @nfctinfo: Relationship of this skb to the connection * @nfct_reasm: netfilter conntrack re-assembly pointer * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c @@ -278,7 +279,8 @@ struct sk_buff { nfctinfo:3; __u8 pkt_type:3, fclone:2, - ipvs_property:1; + ipvs_property:1, + nf_trace:1; __be16 protocol; void (*destructor)(struct sk_buff *skb); -- cgit v1.2.3 From d3c3f4243e135b3d8c41d98be0cb2f54a4141abf Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:38:30 -0700 Subject: [NETFILTER]: ipt_CLUSTERIP: add compat code Adjust structure size and don't expect pointers passed in from userspace to be valid. Also replace an enum in an ABI structure by a fixed size type. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_CLUSTERIP.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h index d9bceedfb3dc..daf50be22c9d 100644 --- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h +++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h @@ -18,13 +18,13 @@ struct clusterip_config; struct ipt_clusterip_tgt_info { u_int32_t flags; - + /* only relevant for new ones */ u_int8_t clustermac[6]; u_int16_t num_total_nodes; u_int16_t num_local_nodes; u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; - enum clusterip_hashmode hash_mode; + u_int32_t hash_mode; u_int32_t hash_initval; struct clusterip_config *config; -- cgit v1.2.3 From 0d53778e81ac7af266dac8a20cc328328c327112 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 7 Jul 2007 22:39:38 -0700 Subject: [NETFILTER]: Convert DEBUGP to pr_debug Convert DEBUGP to pr_debug and fix lots of non-compiling debug statements. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_pptp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h index 9d8144a488cd..c93061f33144 100644 --- a/include/linux/netfilter/nf_conntrack_pptp.h +++ b/include/linux/netfilter/nf_conntrack_pptp.h @@ -4,6 +4,8 @@ #include +extern const char *pptp_msg_name[]; + /* state of the control session */ enum pptp_ctrlsess_state { PPTP_SESSION_NONE, /* no session present */ -- cgit v1.2.3 From ce7663d84a87bb4e1743f62950bf7dceed723a13 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 7 Jul 2007 22:40:08 -0700 Subject: [NETFILTER]: nfnetlink_queue: don't unregister handler of other subsystem The queue handlers registered by ip[6]_queue.ko at initialization should not be unregistered according to requests from userland program using nfnetlink_queue. If we allow that, there is no way to register the handlers of built-in ip[6]_queue again. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 10b5c6275706..0eed0b7ab2df 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -275,7 +275,8 @@ struct nf_queue_handler { }; extern int nf_register_queue_handler(int pf, struct nf_queue_handler *qh); -extern int nf_unregister_queue_handler(int pf); +extern int nf_unregister_queue_handler(int pf, + struct nf_queue_handler *qh); extern void nf_unregister_queue_handlers(struct nf_queue_handler *qh); extern void nf_reinject(struct sk_buff *skb, struct nf_info *info, -- cgit v1.2.3 From c6c6e3e05c0b4349824efcdd36650e7be9d5c7c3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 10 Jul 2007 22:41:55 -0700 Subject: [NET]: Update comments for skb checksums Rusty (whose comments we should all study and emulate :) pointed out that our comments for skb checksums are no longer up-to-date. So here is a patch to 1) add the case of partial checksums on input; 2) update partial checksum case to mention csum_start/csum_offset; 3) mention the new IPv6 feature bit. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 625d73b07ab7..9391e4a4c344 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -65,13 +65,20 @@ * is able to produce some skb->csum, it MUST use COMPLETE, * not UNNECESSARY. * + * PARTIAL: identical to the case for output below. This may occur + * on a packet received directly from another Linux OS, e.g., + * a virtualised Linux kernel on the same host. The packet can + * be treated in the same way as UNNECESSARY except that on + * output (i.e., forwarding) the checksum must be filled in + * by the OS or the hardware. + * * B. Checksumming on output. * * NONE: skb is checksummed by protocol or csum is not required. * * PARTIAL: device is required to csum packet as seen by hard_start_xmit - * from skb->transport_header to the end and to record the checksum - * at skb->transport_header + skb->csum. + * from skb->csum_start to the end and to record the checksum + * at skb->csum_start + skb->csum_offset. * * Device must show its capabilities in dev->features, set * at device setup time. @@ -82,6 +89,7 @@ * TCP/UDP over IPv4. Sigh. Vendors like this * way by an unknown reason. Though, see comment above * about CHECKSUM_UNNECESSARY. 8) + * NETIF_F_IPV6_CSUM about as dumb as the last one but does IPv6 instead. * * Any questions? No questions, good. --ANK */ -- cgit v1.2.3 From bb4dbf9e61d0801927e7df2569bb3dd8287ea301 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 10 Jul 2007 22:55:49 -0700 Subject: [IPV6]: Do not send RH0 anymore. Based on . Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 213b63be3c8f..cb3118cf277c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -27,8 +27,8 @@ struct in6_ifreq { int ifr6_ifindex; }; -#define IPV6_SRCRT_STRICT 0x01 /* this hop must be a neighbor */ -#define IPV6_SRCRT_TYPE_0 0 /* IPv6 type 0 Routing Header */ +#define IPV6_SRCRT_STRICT 0x01 /* Deprecated; will be removed */ +#define IPV6_SRCRT_TYPE_0 0 /* Deprecated; will be removed */ #define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */ /* -- cgit v1.2.3 From 4c752098f529f41abfc985426a3eca0f2cb96676 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 May 2007 13:28:48 +0900 Subject: [IPV6]: Make IPV6_{RECV,2292}RTHDR boolean options. Because reversing RH0 is no longer supported by deprecation of RH0, let's make IPV6_{RECV,2292}RTHDR boolean options. Boolean are more appropriate from standard POV. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index cb3118cf277c..97983dc9df13 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -299,8 +299,8 @@ struct ipv6_pinfo { /* pktoption flags */ union { struct { - __u16 srcrt:2, - osrcrt:2, + __u16 srcrt:1, + osrcrt:1, rxinfo:1, rxoinfo:1, rxhlim:1, -- cgit v1.2.3 From e69ff734e15eb7f61621f8764ce0a2181823a737 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 8 Jun 2007 16:26:08 +1000 Subject: [CRYPTO] cipher: Remove obsolete fields from cipher_tfm This removes all the unused block cipher fields from cipher_tfm. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 0de7e2ace822..357e8cfedc37 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -295,28 +295,8 @@ struct blkcipher_tfm { }; struct cipher_tfm { - void *cit_iv; - unsigned int cit_ivsize; - u32 cit_mode; int (*cit_setkey)(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen); - int (*cit_encrypt)(struct crypto_tfm *tfm, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes); - int (*cit_encrypt_iv)(struct crypto_tfm *tfm, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes, u8 *iv); - int (*cit_decrypt)(struct crypto_tfm *tfm, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes); - int (*cit_decrypt_iv)(struct crypto_tfm *tfm, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes, u8 *iv); - void (*cit_xor_block)(u8 *dst, const u8 *src); void (*cit_encrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); }; -- cgit v1.2.3 From d556ad4bbe75faf17b239e151a9f003322b2e851 Mon Sep 17 00:00:00 2001 From: Peter Oruba Date: Tue, 15 May 2007 13:59:13 +0200 Subject: PCI: add PCI-X/PCI-Express read control interfaces This patch introduces an interface to read and write PCI-X / PCI-Express maximum read byte count values from PCI config space. There is a second function that returns the maximum _designed_ read byte count, which marks the maximum value for a device, since some drivers try to set MMRBC to the highest allowed value and rely on such a function. Based on patch set by Stephen Hemminger Cc: Stephen Hemminger Signed-off-by: Peter Oruba Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 086a0e5a6318..ac403d74a222 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -111,7 +111,8 @@ enum pcie_reset_state { typedef unsigned short __bitwise pci_bus_flags_t; enum pci_bus_flags { - PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1, + PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1, + PCI_BUS_FLAGS_NO_MMRBC = (__force pci_bus_flags_t) 2, }; struct pci_cap_saved_state { @@ -549,6 +550,10 @@ void pci_intx(struct pci_dev *dev, int enable); void pci_msi_off(struct pci_dev *dev); int pci_set_dma_mask(struct pci_dev *dev, u64 mask); int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask); +int pcix_get_max_mmrbc(struct pci_dev *dev); +int pcix_get_mmrbc(struct pci_dev *dev); +int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); +int pcie_set_readrq(struct pci_dev *dev, int rq); void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); int __must_check pci_assign_resource_fixed(struct pci_dev *dev, int i); -- cgit v1.2.3 From 575e3348cb80c3265278756778d5091d5ca4efbf Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 8 May 2007 12:03:07 +1000 Subject: PCI: Use a weak symbol for the empty version of pcibios_add_platform_entries() I'm not sure if this is going to fly, weak symbols work on the compilers I'm using, but whether they work for all of the affected architectures I can't say. I've cc'ed as many arch maintainers/lists as I could find. But assuming they do, we can use a weak empty definition of pcibios_add_platform_entries() to avoid having an empty definition on every arch. Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index ac403d74a222..18319aba1a57 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -881,5 +881,7 @@ extern int pci_pci_problems; extern unsigned long pci_cardbus_io_size; extern unsigned long pci_cardbus_mem_size; +extern void pcibios_add_platform_entries(struct pci_dev *dev); + #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ -- cgit v1.2.3 From a2cd52ca904f5913651e71764755e712894ccc2f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 8 May 2007 12:03:08 +1000 Subject: PCI: Make pcibios_add_platform_entries() return errors Currently pcibios_add_platform_entries() returns void, but could fail, so instead have it return an int and propagate errors up to pci_create_sysfs_dev_files(). Fixes: arch/powerpc/kernel/pci_64.c: In function 'pcibios_add_platform_entries': arch/powerpc/kernel/pci_64.c:878: warning: ignoring return value of 'device_create_file', declared with attribute warn_unused_result arch/powerpc/kernel/pci_32.c: In function 'pcibios_add_platform_entries': arch/powerpc/kernel/pci_32.c:1043: warning: ignoring return value of 'device_create_file', declared with attribute warn_unused_result Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 18319aba1a57..483db814770e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -881,7 +881,7 @@ extern int pci_pci_problems; extern unsigned long pci_cardbus_io_size; extern unsigned long pci_cardbus_mem_size; -extern void pcibios_add_platform_entries(struct pci_dev *dev); +extern int pcibios_add_platform_entries(struct pci_dev *dev); #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ -- cgit v1.2.3 From adf809d01043d8808e47db2d35fc07b53062884e Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 21 May 2007 14:16:17 -0700 Subject: + pci_find_slot-mark-deprecated.patch added to -mm tree We've now fixed up most users of pci_find_slot, and the remainder are either hard and need someone with the hardware and info to work on it, or patches exist but are not yet merged. Time therefore for some gentle encouragement Signed-off-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 483db814770e..4db7b5a18f58 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -476,7 +476,7 @@ extern void pci_sort_breadthfirst(void); /* Generic PCI functions exported to card drivers */ struct pci_dev __deprecated *pci_find_device (unsigned int vendor, unsigned int device, const struct pci_dev *from); -struct pci_dev *pci_find_slot (unsigned int bus, unsigned int devfn); +struct pci_dev __deprecated *pci_find_slot (unsigned int bus, unsigned int devfn); int pci_find_capability (struct pci_dev *dev, int cap); int pci_find_next_capability (struct pci_dev *dev, u8 pos, int cap); int pci_find_ext_capability (struct pci_dev *dev, int cap); -- cgit v1.2.3 From 65b3bc358a3195ebe459761a248cf33a61539947 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 6 Jun 2007 11:46:49 +0800 Subject: PCI aer: fix stub return values The stubs used when advanced error reporting is not enabled must have same return type as real functions. Signed-off-by: Stephen Hemminger Acked-by: Zhang Yanmin Signed-off-by: Greg Kroah-Hartman --- include/linux/aer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/aer.h b/include/linux/aer.h index 402e178b38eb..64aacaed8d6c 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -14,10 +14,10 @@ extern int pci_find_aer_capability(struct pci_dev *dev); extern int pci_disable_pcie_error_reporting(struct pci_dev *dev); extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); #else -#define pci_enable_pcie_error_reporting(dev) do { } while (0) -#define pci_find_aer_capability(dev) do { } while (0) -#define pci_disable_pcie_error_reporting(dev) do { } while (0) -#define pci_cleanup_aer_uncorrect_error_status(dev) do { } while (0) +#define pci_enable_pcie_error_reporting(dev) (-EINVAL) +#define pci_find_aer_capability(dev) (0) +#define pci_disable_pcie_error_reporting(dev) (-EINVAL) +#define pci_cleanup_aer_uncorrect_error_status(dev) (-EINVAL) #endif #endif //_AER_H_ -- cgit v1.2.3 From f0dce411930d16a678173e534594bca160f5eaff Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 6 Jun 2007 11:50:34 +0800 Subject: PCI aer: add pci_cleanup_aer_correct_aer_status Function to clear bogus correctable errors. Analog to pci_aer_uncorrect_are_status. The Marvell chips seem to start out with a bogus value that needs to be cleared. Yanmin ported it to 2.6.22-rc4 by fixing a fuzz patch applying info. Signed-off-by: Stephen Hemminger Acked-by: Zhang Yanmin Signed-off-by: Greg Kroah-Hartman --- include/linux/aer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/aer.h b/include/linux/aer.h index 64aacaed8d6c..509656286e53 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -13,11 +13,13 @@ extern int pci_enable_pcie_error_reporting(struct pci_dev *dev); extern int pci_find_aer_capability(struct pci_dev *dev); extern int pci_disable_pcie_error_reporting(struct pci_dev *dev); extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); +extern int pci_cleanup_aer_correct_error_status(struct pci_dev *dev); #else #define pci_enable_pcie_error_reporting(dev) (-EINVAL) #define pci_find_aer_capability(dev) (0) #define pci_disable_pcie_error_reporting(dev) (-EINVAL) #define pci_cleanup_aer_uncorrect_error_status(dev) (-EINVAL) +#define pci_cleanup_aer_correct_error_status(dev) (-EINVAL) #endif #endif //_AER_H_ -- cgit v1.2.3 From 56906c612e10b5e32a48ccbe8a3c08ab6acf5a28 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Mon, 7 May 2007 10:26:17 -0700 Subject: PCI: remove useless pci driver method Remove pointless and never-called enable_wake() hook from pci_driver and from documentation. Evidently this was introduced in the 2.4.6 kernel, but there's no evidence it was ever called; and it was rarely implemented. Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4db7b5a18f58..5be420ac6303 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -371,7 +371,6 @@ struct pci_driver { int (*suspend_late) (struct pci_dev *dev, pm_message_t state); int (*resume_early) (struct pci_dev *dev); int (*resume) (struct pci_dev *dev); /* Device woken up */ - int (*enable_wake) (struct pci_dev *dev, pci_power_t state, int enable); /* Enable wake event */ void (*shutdown) (struct pci_dev *dev); struct pci_error_handlers *err_handler; -- cgit v1.2.3 From b8a3a5214d7cc115f1ca3a3967b7229d97c46f4a Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Fri, 8 Jun 2007 15:46:30 -0700 Subject: PCI: read revision ID by default Currently there are 97 occurrences where drivers need the pci revision ID. We can do this once for all devices. Even the pci subsystem needs the revision several times for quirks. The extra u8 member pads out nicely in the pci_dev struct. Signed-off-by: Auke Kok Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 5be420ac6303..45332440a2e6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -139,6 +139,7 @@ struct pci_dev { unsigned short subsystem_vendor; unsigned short subsystem_device; unsigned int class; /* 3 bytes: (base,sub,prog-if) */ + u8 revision; /* PCI revision, low byte of class word */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */ u8 rom_base_reg; /* which config register controls the ROM */ u8 pin; /* which interrupt pin this device uses */ -- cgit v1.2.3 From 1d0ed384c1f2582b6f7408642c77a78a0c410122 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 18 Jun 2007 10:58:13 +0200 Subject: PCI: ATM: lanai, change VENDOR to DEVICE lanai, change VENDOR to DEVICE There were 2 bad named macros in pci_ids (LANAI 2 and IHB). Rename it to DEVICE, because it's device id. Also make some cleanpu in pci_device_id table (use PCI_VDEVICE). Cc: Mitchell Blank Jr Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/pci_ids.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 75c4d4d06892..a260a947c917 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1383,8 +1383,8 @@ #define PCI_VENDOR_ID_EF 0x111a #define PCI_DEVICE_ID_EF_ATM_FPGA 0x0000 #define PCI_DEVICE_ID_EF_ATM_ASIC 0x0002 -#define PCI_VENDOR_ID_EF_ATM_LANAI2 0x0003 -#define PCI_VENDOR_ID_EF_ATM_LANAIHB 0x0005 +#define PCI_DEVICE_ID_EF_ATM_LANAI2 0x0003 +#define PCI_DEVICE_ID_EF_ATM_LANAIHB 0x0005 #define PCI_VENDOR_ID_IDT 0x111d #define PCI_DEVICE_ID_IDT_IDT77201 0x0001 -- cgit v1.2.3 From c43eaa02abf3b034a9694dcca5c177ecb6072f89 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 18 Jun 2007 10:58:14 +0200 Subject: PCI: i386: traps, change VENDOR to DEVICE traps, change VENDOR to DEVICE Change macro for SGI lithium (arch/i386/mach-visws/traps.c) device from VENDOR to DEVICE, because it's a device id. Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/pci_ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a260a947c917..228e0befeda1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -944,8 +944,8 @@ #define PCI_VENDOR_ID_SGI 0x10a9 #define PCI_DEVICE_ID_SGI_IOC3 0x0003 +#define PCI_DEVICE_ID_SGI_LITHIUM 0x1002 #define PCI_DEVICE_ID_SGI_IOC4 0x100a -#define PCI_VENDOR_ID_SGI_LITHIUM 0x1002 #define PCI_VENDOR_ID_WINBOND 0x10ad -- cgit v1.2.3 From 03966e097db1a3b7aff5d364277f2e66069923df Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 18 Jun 2007 10:55:30 +0200 Subject: PCI: pci_ids, reorder some entries pci_ids, reorder some entries Some lines are not vendor sorted, reorder it to comply with the rest of document. Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/pci_ids.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 228e0befeda1..0058fb920c74 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -133,6 +133,9 @@ /* Vendors and devices. Sort key: vendor first, device next. */ +#define PCI_VENDOR_ID_TTTECH 0x0357 +#define PCI_DEVICE_ID_TTTECH_MC322 0x000a + #define PCI_VENDOR_ID_DYNALINK 0x0675 #define PCI_DEVICE_ID_DYNALINK_IS64PH 0x1702 @@ -1902,6 +1905,8 @@ #define PCI_DEVICE_ID_OXSEMI_16PCI952 0x9521 #define PCI_DEVICE_ID_OXSEMI_16PCI952PP 0x9523 +#define PCI_VENDOR_ID_CHELSIO 0x1425 + #define PCI_VENDOR_ID_SAMSUNG 0x144d #define PCI_VENDOR_ID_MYRICOM 0x14c1 @@ -2010,8 +2015,6 @@ #define PCI_DEVICE_ID_ENE_720 0x1421 #define PCI_DEVICE_ID_ENE_722 0x1422 -#define PCI_VENDOR_ID_CHELSIO 0x1425 - #define PCI_SUBVENDOR_ID_PERLE 0x155f #define PCI_SUBDEVICE_ID_PCI_RAS4 0xf001 #define PCI_SUBDEVICE_ID_PCI_RAS8 0xf010 @@ -2035,6 +2038,9 @@ #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c #define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274 +#define PCI_VENDOR_ID_QUICKNET 0x15e2 +#define PCI_DEVICE_ID_QUICKNET_XJ 0x0500 + #define PCI_VENDOR_ID_PDC 0x15e9 @@ -2412,13 +2418,7 @@ #define PCI_DEVICE_ID_TIGERJET_300 0x0001 #define PCI_DEVICE_ID_TIGERJET_100 0x0002 -#define PCI_VENDOR_ID_TTTECH 0x0357 -#define PCI_DEVICE_ID_TTTECH_MC322 0x000A - #define PCI_VENDOR_ID_XILINX_RME 0xea60 #define PCI_DEVICE_ID_RME_DIGI32 0x9896 #define PCI_DEVICE_ID_RME_DIGI32_PRO 0x9897 #define PCI_DEVICE_ID_RME_DIGI32_8 0x9898 - -#define PCI_VENDOR_ID_QUICKNET 0x15E2 -#define PCI_DEVICE_ID_QUICKNET_XJ 0x0500 -- cgit v1.2.3 From f732ee0b71365ddc20e6a0b408f9fd1732d7eb75 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 18 Jun 2007 10:58:14 +0200 Subject: PCI: pci_ids, add atheros and 3com_2 vendors pci_ids, add atheros and 3com_2 vendors Atheros is wifi vendor. 3com_2 (0xa727) is an vendor id for one card with ath chip. Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/pci_ids.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0058fb920c74..0995e97b1ccf 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2059,6 +2059,8 @@ #define PCI_DEVICE_ID_BCM1250_PCI 0x0001 #define PCI_DEVICE_ID_BCM1250_HT 0x0002 +#define PCI_VENDOR_ID_ATHEROS 0x168c + #define PCI_VENDOR_ID_NETCELL 0x169c #define PCI_DEVICE_ID_REVOLUTION 0x0044 @@ -2410,6 +2412,8 @@ #define PCI_DEVICE_ID_NETMOS_9845 0x9845 #define PCI_DEVICE_ID_NETMOS_9855 0x9855 +#define PCI_VENDOR_ID_3COM_2 0xa727 + #define PCI_SUBVENDOR_ID_EXSYS 0xd84d #define PCI_SUBDEVICE_ID_EXSYS_4014 0x4014 #define PCI_SUBDEVICE_ID_EXSYS_4055 0x4055 -- cgit v1.2.3 From 12bedda9f404c4d34eda6477b0ec32140d83501b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 18 Jun 2007 10:56:52 +0200 Subject: PCI: pci_ids, remove double or more empty lines pci_ids, remove two or more empty lines Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/linux/pci_ids.h | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0995e97b1ccf..93961e9d0308 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -735,7 +735,6 @@ #define PCI_DEVICE_ID_ELSA_MICROLINK 0x1000 #define PCI_DEVICE_ID_ELSA_QS3000 0x3000 - #define PCI_VENDOR_ID_BUSLOGIC 0x104B #define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC 0x0140 #define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER 0x1040 @@ -781,7 +780,6 @@ #define PCI_VENDOR_ID_SONY 0x104d - /* Winbond have two vendor IDs! See 0x10ad as well */ #define PCI_VENDOR_ID_WINBOND2 0x1050 #define PCI_DEVICE_ID_WINBOND2_89C940F 0x5a5a @@ -819,7 +817,6 @@ #define PCI_DEVICE_ID_PROMISE_20276 0x5275 #define PCI_DEVICE_ID_PROMISE_20277 0x7275 - #define PCI_VENDOR_ID_UMC 0x1060 #define PCI_DEVICE_ID_UMC_UM8673F 0x0101 #define PCI_DEVICE_ID_UMC_UM8886BF 0x673a @@ -835,7 +832,6 @@ #define PCI_DEVICE_ID_MYLEX_DAC960_BA 0xBA56 #define PCI_DEVICE_ID_MYLEX_DAC960_GEM 0xB166 - #define PCI_VENDOR_ID_APPLE 0x106b #define PCI_DEVICE_ID_APPLE_BANDIT 0x0001 #define PCI_DEVICE_ID_APPLE_HYDRA 0x000e @@ -871,7 +867,6 @@ #define PCI_DEVICE_ID_YAMAHA_744 0x0010 #define PCI_DEVICE_ID_YAMAHA_754 0x0012 - #define PCI_VENDOR_ID_QLOGIC 0x1077 #define PCI_DEVICE_ID_QLOGIC_ISP10160 0x1016 #define PCI_DEVICE_ID_QLOGIC_ISP1020 0x1020 @@ -902,12 +897,9 @@ #define PCI_DEVICE_ID_CYRIX_5530_AUDIO 0x0103 #define PCI_DEVICE_ID_CYRIX_5530_VIDEO 0x0104 - - #define PCI_VENDOR_ID_CONTAQ 0x1080 #define PCI_DEVICE_ID_CONTAQ_82C693 0xc693 - #define PCI_VENDOR_ID_OLICOM 0x108d #define PCI_DEVICE_ID_OLICOM_OC2325 0x0012 #define PCI_DEVICE_ID_OLICOM_OC2183 0x0013 @@ -939,23 +931,19 @@ #define PCI_DEVICE_ID_SII_3112 0x3112 #define PCI_DEVICE_ID_SII_1210SA 0x0240 - #define PCI_VENDOR_ID_BROOKTREE 0x109e #define PCI_DEVICE_ID_BROOKTREE_878 0x0878 #define PCI_DEVICE_ID_BROOKTREE_879 0x0879 - #define PCI_VENDOR_ID_SGI 0x10a9 #define PCI_DEVICE_ID_SGI_IOC3 0x0003 #define PCI_DEVICE_ID_SGI_LITHIUM 0x1002 #define PCI_DEVICE_ID_SGI_IOC4 0x100a - #define PCI_VENDOR_ID_WINBOND 0x10ad #define PCI_DEVICE_ID_WINBOND_82C105 0x0105 #define PCI_DEVICE_ID_WINBOND_83C553 0x0565 - #define PCI_VENDOR_ID_PLX 0x10b5 #define PCI_DEVICE_ID_PLX_R685 0x1030 #define PCI_DEVICE_ID_PLX_ROMULUS 0x106a @@ -989,7 +977,6 @@ #define PCI_DEVICE_ID_3COM_3CR990SVR97 0x9909 #define PCI_DEVICE_ID_3COM_3CR990SVR 0x990a - #define PCI_VENDOR_ID_AL 0x10b9 #define PCI_DEVICE_ID_AL_M1533 0x1533 #define PCI_DEVICE_ID_AL_M1535 0x1535 @@ -1012,18 +999,14 @@ #define PCI_DEVICE_ID_AL_M5451 0x5451 #define PCI_DEVICE_ID_AL_M7101 0x7101 - - #define PCI_VENDOR_ID_NEOMAGIC 0x10c8 #define PCI_DEVICE_ID_NEOMAGIC_NM256AV_AUDIO 0x8005 #define PCI_DEVICE_ID_NEOMAGIC_NM256ZX_AUDIO 0x8006 #define PCI_DEVICE_ID_NEOMAGIC_NM256XL_PLUS_AUDIO 0x8016 - #define PCI_VENDOR_ID_TCONRAD 0x10da #define PCI_DEVICE_ID_TCONRAD_TOKENRING 0x0508 - #define PCI_VENDOR_ID_NVIDIA 0x10de #define PCI_DEVICE_ID_NVIDIA_TNT 0x0020 #define PCI_DEVICE_ID_NVIDIA_TNT2 0x0028 @@ -1244,9 +1227,6 @@ #define PCI_DEVICE_ID_IMS_TT128 0x9128 #define PCI_DEVICE_ID_IMS_TT3D 0x9135 - - - #define PCI_VENDOR_ID_INTERG 0x10ea #define PCI_DEVICE_ID_INTERG_1682 0x1682 #define PCI_DEVICE_ID_INTERG_2000 0x2000 @@ -1265,7 +1245,6 @@ #define PCI_DEVICE_ID_XILINX_HAMMERFALL_DSP 0x3fc5 #define PCI_DEVICE_ID_XILINX_HAMMERFALL_DSP_MADI 0x3fc6 - #define PCI_VENDOR_ID_INIT 0x1101 #define PCI_VENDOR_ID_CREATIVE 0x1102 /* duplicate: ECTIVA */ @@ -1360,7 +1339,6 @@ #define PCI_VENDOR_ID_SIEMENS 0x110A #define PCI_DEVICE_ID_SIEMENS_DSCC4 0x2102 - #define PCI_VENDOR_ID_VORTEX 0x1119 #define PCI_DEVICE_ID_VORTEX_GDT60x0 0x0000 #define PCI_DEVICE_ID_VORTEX_GDT6000B 0x0001 @@ -1395,7 +1373,6 @@ #define PCI_VENDOR_ID_FORE 0x1127 #define PCI_DEVICE_ID_FORE_PCA200E 0x0300 - #define PCI_VENDOR_ID_PHILIPS 0x1131 #define PCI_DEVICE_ID_PHILIPS_SAA7146 0x7146 #define PCI_DEVICE_ID_PHILIPS_SAA9730 0x9730 @@ -1414,7 +1391,6 @@ #define PCI_DEVICE_ID_ZIATECH_5550_HC 0x5550 - #define PCI_VENDOR_ID_SYSKONNECT 0x1148 #define PCI_DEVICE_ID_SYSKONNECT_TR 0x4200 #define PCI_DEVICE_ID_SYSKONNECT_GE 0x4300 @@ -1422,7 +1398,6 @@ #define PCI_DEVICE_ID_SYSKONNECT_9DXX 0x4400 #define PCI_DEVICE_ID_SYSKONNECT_9MXX 0x4500 - #define PCI_VENDOR_ID_DIGI 0x114f #define PCI_DEVICE_ID_DIGI_DF_M_IOM2_E 0x0070 #define PCI_DEVICE_ID_DIGI_DF_M_E 0x0071 @@ -1433,12 +1408,10 @@ #define PCI_DEVICE_ID_NEO_2RJ45 0x00CA #define PCI_DEVICE_ID_NEO_2RJ45PRI 0x00CB - #define PCI_VENDOR_ID_XIRCOM 0x115d #define PCI_DEVICE_ID_XIRCOM_RBM56G 0x0101 #define PCI_DEVICE_ID_XIRCOM_X3201_MDM 0x0103 - #define PCI_VENDOR_ID_SERVERWORKS 0x1166 #define PCI_DEVICE_ID_SERVERWORKS_HE 0x0008 #define PCI_DEVICE_ID_SERVERWORKS_LE 0x0009 @@ -1507,7 +1480,6 @@ #define PCI_DEVICE_ID_ZEITNET_1221 0x0001 #define PCI_DEVICE_ID_ZEITNET_1225 0x0002 - #define PCI_VENDOR_ID_FUJITSU_ME 0x119e #define PCI_DEVICE_ID_FUJITSU_FS155 0x0001 #define PCI_DEVICE_ID_FUJITSU_FS50 0x0003 @@ -1525,28 +1497,23 @@ #define PCI_DEVICE_ID_V3_V960 0x0001 #define PCI_DEVICE_ID_V3_V351 0x0002 - #define PCI_VENDOR_ID_ATT 0x11c1 #define PCI_DEVICE_ID_ATT_VENUS_MODEM 0x480 - #define PCI_VENDOR_ID_SPECIALIX 0x11cb #define PCI_DEVICE_ID_SPECIALIX_IO8 0x2000 #define PCI_DEVICE_ID_SPECIALIX_RIO 0x8000 #define PCI_SUBDEVICE_ID_SPECIALIX_SPEED4 0xa004 - #define PCI_VENDOR_ID_ANALOG_DEVICES 0x11d4 #define PCI_DEVICE_ID_AD1889JS 0x1889 - #define PCI_DEVICE_ID_SEGA_BBA 0x1234 #define PCI_VENDOR_ID_ZORAN 0x11de #define PCI_DEVICE_ID_ZORAN_36057 0x6057 #define PCI_DEVICE_ID_ZORAN_36120 0x6120 - #define PCI_VENDOR_ID_COMPEX 0x11f6 #define PCI_DEVICE_ID_COMPEX_ENET100VG4 0x0112 @@ -1605,8 +1572,6 @@ #define PCI_DEVICE_ID_3DFX_VOODOO3 0x0005 #define PCI_DEVICE_ID_3DFX_VOODOO5 0x0009 - - #define PCI_VENDOR_ID_AVM 0x1244 #define PCI_DEVICE_ID_AVM_B1 0x0700 #define PCI_DEVICE_ID_AVM_C4 0x0800 @@ -1615,7 +1580,6 @@ #define PCI_DEVICE_ID_AVM_C2 0x1100 #define PCI_DEVICE_ID_AVM_T1 0x1200 - #define PCI_VENDOR_ID_STALLION 0x124d /* Allied Telesyn */ @@ -1638,7 +1602,6 @@ #define PCI_VENDOR_ID_SATSAGEM 0x1267 #define PCI_DEVICE_ID_SATSAGEM_NICCY 0x1016 - #define PCI_VENDOR_ID_ENSONIQ 0x1274 #define PCI_DEVICE_ID_ENSONIQ_CT5880 0x5880 #define PCI_DEVICE_ID_ENSONIQ_ES1370 0x5000 @@ -1661,7 +1624,6 @@ #define PCI_VENDOR_ID_ALTEON 0x12ae - #define PCI_SUBVENDOR_ID_CONNECT_TECH 0x12c4 #define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_232 0x0001 #define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_232 0x0002 @@ -1692,7 +1654,6 @@ #define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_4_485 0x0331 #define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_8_485 0x0332 - #define PCI_VENDOR_ID_NVIDIA_SGS 0x12d2 #define PCI_DEVICE_ID_NVIDIA_SGS_RIVA128 0x0018 @@ -1802,7 +1763,6 @@ #define PCI_DEVICE_ID_LMC_SSI 0x0005 #define PCI_DEVICE_ID_LMC_T1 0x0006 - #define PCI_VENDOR_ID_NETGEAR 0x1385 #define PCI_DEVICE_ID_NETGEAR_GA620 0x620a @@ -2019,7 +1979,6 @@ #define PCI_SUBDEVICE_ID_PCI_RAS4 0xf001 #define PCI_SUBDEVICE_ID_PCI_RAS8 0xf010 - #define PCI_VENDOR_ID_SYBA 0x1592 #define PCI_DEVICE_ID_SYBA_2P_EPP 0x0782 #define PCI_DEVICE_ID_SYBA_1P_ECP 0x0783 @@ -2043,7 +2002,6 @@ #define PCI_VENDOR_ID_PDC 0x15e9 - #define PCI_VENDOR_ID_FARSITE 0x1619 #define PCI_DEVICE_ID_FARSITE_T2P 0x0400 #define PCI_DEVICE_ID_FARSITE_T4P 0x0440 @@ -2099,7 +2057,6 @@ #define PCI_DEVICE_ID_HERC_WIN 0x5732 #define PCI_DEVICE_ID_HERC_UNI 0x5832 - #define PCI_VENDOR_ID_SITECOM 0x182d #define PCI_DEVICE_ID_SITECOM_DC105V2 0x3069 @@ -2135,12 +2092,9 @@ #define PCI_DEVICE_ID_3DLABS_PERMEDIA2 0x0007 #define PCI_DEVICE_ID_3DLABS_PERMEDIA2V 0x0009 - #define PCI_VENDOR_ID_AKS 0x416c #define PCI_DEVICE_ID_AKS_ALADDINCARD 0x0100 - - #define PCI_VENDOR_ID_S3 0x5333 #define PCI_DEVICE_ID_S3_TRIO 0x8811 #define PCI_DEVICE_ID_S3_868 0x8880 @@ -2152,7 +2106,6 @@ #define PCI_VENDOR_ID_DUNORD 0x5544 #define PCI_DEVICE_ID_DUNORD_I3000 0x0001 - #define PCI_VENDOR_ID_DCI 0x6666 #define PCI_DEVICE_ID_DCI_PCCOM4 0x0001 #define PCI_DEVICE_ID_DCI_PCCOM8 0x0002 @@ -2396,7 +2349,6 @@ #define PCI_DEVICE_ID_ADAPTEC2_OBSIDIAN 0x0500 #define PCI_DEVICE_ID_ADAPTEC2_SCAMP 0x0503 - #define PCI_VENDOR_ID_HOLTEK 0x9412 #define PCI_DEVICE_ID_HOLTEK_6565 0x6565 -- cgit v1.2.3 From 579082df38839efc5b14aa3f48b8806e3e8dc5c2 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Tue, 10 Jul 2007 13:35:05 +0200 Subject: PCI: Fix typo in include/linux/pci.h Signed-off-by: Rolf Eike Beer Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 45332440a2e6..a6657b7f245d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -315,7 +315,7 @@ struct pci_dynids { /* ---------------------------------------------------------------- */ /** PCI Error Recovery System (PCI-ERS). If a PCI device driver provides - * a set fof callbacks in struct pci_error_handlers, then that device driver + * a set of callbacks in struct pci_error_handlers, then that device driver * will be notified of PCI bus errors, and will be driven to recovery * when an error occurs. */ -- cgit v1.2.3 From 694625c0b322905d6892fad873029f764cd4823f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 9 Jul 2007 11:55:54 -0700 Subject: PCI: add pci_try_set_mwi As suggested by Andrew, add pci_try_set_mwi(), which does not require return-value checking. - add pci_try_set_mwi() without __must_check - make it return 0 on success, errno if the "try" failed or error - review callers Signed-off-by: Randy Dunlap Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index a6657b7f245d..a5602e26f4dd 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -545,6 +545,7 @@ void pci_set_master(struct pci_dev *dev); int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state); #define HAVE_PCI_SET_MWI int __must_check pci_set_mwi(struct pci_dev *dev); +int pci_try_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); void pci_intx(struct pci_dev *dev, int enable); void pci_msi_off(struct pci_dev *dev); -- cgit v1.2.3 From cfc94cdf8e0f14e692a5a40ef3cc10f464b2511b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 9 May 2007 13:19:52 +0200 Subject: debugfs: add rename for debugfs files Implement debugfs_rename() to allow renaming files/directories in debugfs. Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 5a9c49534d08..104e51e20e14 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -38,6 +38,9 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, void debugfs_remove(struct dentry *dentry); +struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, + struct dentry *new_dir, const char *new_name); + struct dentry *debugfs_create_u8(const char *name, mode_t mode, struct dentry *parent, u8 *value); struct dentry *debugfs_create_u16(const char *name, mode_t mode, @@ -85,6 +88,12 @@ static inline struct dentry *debugfs_create_symlink(const char *name, static inline void debugfs_remove(struct dentry *dentry) { } +static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, + struct dentry *new_dir, char *new_name) +{ + return ERR_PTR(-ENODEV); +} + static inline struct dentry *debugfs_create_u8(const char *name, mode_t mode, struct dentry *parent, u8 *value) -- cgit v1.2.3 From 4f5c791a850e5305a5b1b48d0e4b4de248dc96f9 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 8 May 2007 22:07:02 +0200 Subject: DMI-based module autoloading The patch below adds DMI/SMBIOS based module autoloading to the Linux kernel. The idea is to load laptop drivers automatically (and other drivers which cannot be autoloaded otherwise), based on the DMI system identification information of the BIOS. Right now most distros manually try to load all available laptop drivers on bootup in the hope that at least one of them loads successfully. This patch does away with all that, and uses udev to automatically load matching drivers on the right machines. Basically the patch just exports the DMI information that has been parsed by the kernel anyway to userspace via a sysfs device /sys/class/dmi/id and makes sure that proper modalias attributes are available. Besides adding the "modalias" attribute it also adds attributes for a few other DMI fields which might be useful for writing udev rules. This patch is not an attempt to export the entire DMI/SMBIOS data to userspace. We already have "dmidecode" which parses the complete DMI info from userspace. The purpose of this patch is machine model identification and good udev integration. To take advantage of DMI based module autoloading, a driver should export one or more MODULE_ALIAS fields similar to these: MODULE_ALIAS("dmi:*:svnMICRO-STARINT'LCO.,LTD:pnMS-1013:pvr0131*:cvnMICRO-STARINT'LCO.,LTD:ct10:*"); MODULE_ALIAS("dmi:*:svnMicro-StarInternational:pnMS-1058:pvr0581:rvnMSI:rnMS-1058:*:ct10:*"); MODULE_ALIAS("dmi:*:svnMicro-StarInternational:pnMS-1412:*:rvnMSI:rnMS-1412:*:cvnMICRO-STARINT'LCO.,LTD:ct10:*"); MODULE_ALIAS("dmi:*:svnNOTEBOOK:pnSAM2000:pvr0131*:cvnMICRO-STARINT'LCO.,LTD:ct10:*"); These lines are specific to my msi-laptop.c driver. They are basically just a concatenation of a few carefully selected DMI fields with all potentially bad characters stripped. Besides laptop drivers, modules like "hdaps", the i2c modules and the hwmon modules are good candidates for "dmi:" MODULE_ALIAS lines. Besides merely exporting the DMI data via sysfs the patch adds support for a few more DMI fields. Especially the CHASSIS fields are very useful to identify different laptop modules. The patch also adds working MODULE_ALIAS lines to my msi-laptop.c driver. I'd like to thank Kay Sievers for helping me to clean up this patch for posting it on lkml. Patch is against Linus' current GIT HEAD. Should probably apply to older kernels as well without modification. Signed-off-by: Lennart Poettering Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/dmi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 904bf3d2d90b..b8ac7b01c45e 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -12,9 +12,17 @@ enum dmi_field { DMI_PRODUCT_NAME, DMI_PRODUCT_VERSION, DMI_PRODUCT_SERIAL, + DMI_PRODUCT_UUID, DMI_BOARD_VENDOR, DMI_BOARD_NAME, DMI_BOARD_VERSION, + DMI_BOARD_SERIAL, + DMI_BOARD_ASSET_TAG, + DMI_CHASSIS_VENDOR, + DMI_CHASSIS_TYPE, + DMI_CHASSIS_VERSION, + DMI_CHASSIS_SERIAL, + DMI_CHASSIS_ASSET_TAG, DMI_STRING_MAX, }; -- cgit v1.2.3 From 9cddad77574313fcee36c5e60122718daa7c0361 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 13 Jun 2007 15:53:34 +0200 Subject: PM: Remove pm_parent from struct dev_pm_info The pm_parent member of struct dev_pm_info (defined in include/linux/pm.h) is only used to check if the device's parent is in the right state while the device is being suspended or resumed. However, this can be done just as well with the help of the parent pointer in struct device, so pm_parent can be removed along with some code that handles it. Signed-off-by: Rafael J. Wysocki Acked-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/pm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index b2c4fde4e994..3fd65ad4b097 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -269,13 +269,10 @@ struct dev_pm_info { unsigned should_wakeup:1; pm_message_t prev_state; void * saved_state; - struct device * pm_parent; struct list_head entry; #endif }; -extern void device_pm_set_parent(struct device * dev, struct device * parent); - extern int device_power_down(pm_message_t state); extern void device_power_up(void); extern void device_resume(void); -- cgit v1.2.3 From cc4900690bf77257996e90f0059eb074b8db52e6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 13 Jun 2007 15:55:34 +0200 Subject: PM: Remove saved_state from struct dev_pm_info The saved_state member of struct dev_pm_info, defined in include/linux/pm.h, is not used anywhere, so it can be removed. Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- include/linux/pm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 3fd65ad4b097..6e7f06671683 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -268,7 +268,6 @@ struct dev_pm_info { #ifdef CONFIG_PM unsigned should_wakeup:1; pm_message_t prev_state; - void * saved_state; struct list_head entry; #endif }; -- cgit v1.2.3 From 515c53576299e32d6bdb6295cfa2fe1307516eb4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 17 Jun 2007 19:48:06 +0200 Subject: PM: Remove prev_state from struct dev_pm_info The prev_state member of struct dev_pm_info (defined in include/linux/pm.h) is only used during a resume to check if the device's state before the suspend was 'off', in which case the device is not resumed. However, in such cases the decision whether or not to resume the device should be made on the driver level and the resume callbacks from the device's bus and class should be executed anyway (the may be needed for some things other than just powering on the device). Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- include/linux/pm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 6e7f06671683..273781c82e4d 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -267,7 +267,6 @@ struct dev_pm_info { unsigned can_wakeup:1; #ifdef CONFIG_PM unsigned should_wakeup:1; - pm_message_t prev_state; struct list_head entry; #endif }; -- cgit v1.2.3 From 72dba584b695d8bc8c1a50ed54ad4cba7c62314d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jun 2007 03:45:13 +0900 Subject: ida: implement idr based id allocator Implement idr based id allocator. ida is used the same way idr is used but lacks id -> ptr translation and thus consumes much less memory. struct ida_bitmap is attached as leaf nodes to idr tree which is managed by the idr code. Each ida_bitmap is 128bytes long and contains slightly less than a thousand slots. ida is more aggressive with releasing extra resources acquired using ida_pre_get(). After every successful id allocation, ida frees one reserved idr_layer if possible. Reserved ida_bitmap is not freed automatically but only one ida_bitmap is reserved and it's almost always used right away. Under most circumstances, ida won't hold on to memory for too long which isn't actively used. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/idr.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 826803449db7..915572fa030b 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -83,4 +83,33 @@ void idr_remove(struct idr *idp, int id); void idr_destroy(struct idr *idp); void idr_init(struct idr *idp); + +/* + * IDA - IDR based id allocator, use when translation from id to + * pointer isn't necessary. + */ +#define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */ +#define IDA_BITMAP_LONGS (128 / sizeof(long) - 1) +#define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8) + +struct ida_bitmap { + long nr_busy; + unsigned long bitmap[IDA_BITMAP_LONGS]; +}; + +struct ida { + struct idr idr; + struct ida_bitmap *free_bitmap; +}; + +#define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, } +#define DEFINE_IDA(name) struct ida name = IDA_INIT(name) + +int ida_pre_get(struct ida *ida, gfp_t gfp_mask); +int ida_get_new_above(struct ida *ida, int starting_id, int *p_id); +int ida_get_new(struct ida *ida, int *p_id); +void ida_remove(struct ida *ida, int id); +void ida_destroy(struct ida *ida); +void ida_init(struct ida *ida); + #endif /* __IDR_H__ */ -- cgit v1.2.3 From 0c096b507f15397da890051ee73de4266d3941fb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jun 2007 03:45:15 +0900 Subject: sysfs: add sysfs_dirent->s_name Add s_name to sysfs_dirent. This is to further reduce dependency to the associated dentry. Name is copied for directories and symlinks but not for attributes. Where possible, name dereferences are converted to use sd->s_name. sysfs_symlink->link_name and sysfs_get_name() are unused now and removed. This change allows symlink to be implemented using sysfs_dirent tree proper, which is the last remaining dentry-dependent sysfs walk. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 7d5d1ec95c2e..2f86b080b39d 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -76,6 +76,7 @@ struct sysfs_ops { #define SYSFS_KOBJ_BIN_ATTR 0x0008 #define SYSFS_KOBJ_LINK 0x0020 #define SYSFS_NOT_PINNED (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR | SYSFS_KOBJ_LINK) +#define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) #ifdef CONFIG_SYSFS -- cgit v1.2.3 From 7b595756ec1f49e0049a9e01a1298d53a7faaa15 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jun 2007 03:45:17 +0900 Subject: sysfs: kill unnecessary attribute->owner sysfs is now completely out of driver/module lifetime game. After deletion, a sysfs node doesn't access anything outside sysfs proper, so there's no reason to hold onto the attribute owners. Note that often the wrong modules were accounted for as owners leading to accessing removed modules. This patch kills now unnecessary attribute->owner. Note that with this change, userland holding a sysfs node does not prevent the backing module from being unloaded. For more info regarding lifetime rule cleanup, please read the following message. http://article.gmane.org/gmane.linux.kernel/510293 (tweaked by Greg to not delete the field just yet, to make it easier to merge things properly.) Signed-off-by: Tejun Heo Cc: Cornelia Huck Cc: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/sysdev.h | 3 +-- include/linux/sysfs.h | 12 ++++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h index e699ab279c2c..e285746588d6 100644 --- a/include/linux/sysdev.h +++ b/include/linux/sysdev.h @@ -101,8 +101,7 @@ struct sysdev_attribute { #define _SYSDEV_ATTR(_name,_mode,_show,_store) \ { \ - .attr = { .name = __stringify(_name), .mode = _mode, \ - .owner = THIS_MODULE }, \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .store = _store, \ } diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 2f86b080b39d..161e19aa2b4f 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -20,9 +20,13 @@ struct module; struct nameidata; struct dentry; +/* FIXME + * The *owner field is no longer used, but leave around + * until the tree gets cleaned up fully. + */ struct attribute { const char * name; - struct module * owner; + struct module * owner; mode_t mode; }; @@ -39,14 +43,14 @@ struct attribute_group { */ #define __ATTR(_name,_mode,_show,_store) { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .store = _store, \ } #define __ATTR_RO(_name) { \ - .attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \ - .show = _name##_show, \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .show = _name##_show, \ } #define __ATTR_NULL { .attr = { .name = NULL } } -- cgit v1.2.3 From ad6a1e1c66009ba9dcd2f5c90ffa1fb4ce72fce0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jun 2007 03:45:17 +0900 Subject: driver-core: make devt_attr and uevent_attr static devt_attr and uevent_attr are either allocated dynamically with or embedded in device and class_device as they needed their owner field set to the module implementing the driver. Now that sysfs implements immediate disconnect and owner field removed from struct attribute, there is no reason to do this. Remove these attributes from [class_]device and use static attribute structures instead. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 2e1a2988b7e1..be2debed70d2 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -238,7 +238,6 @@ extern int __must_check class_device_create_file(struct class_device *, * @devt: for internal use by the driver core only. * @node: for internal use by the driver core only. * @kobj: for internal use by the driver core only. - * @devt_attr: for internal use by the driver core only. * @groups: optional additional groups to be created * @dev: if set, a symlink to the struct device is created in the sysfs * directory for this struct class device. @@ -263,8 +262,6 @@ struct class_device { struct kobject kobj; struct class * class; /* required */ dev_t devt; /* dev_t, creates the sysfs "dev" */ - struct class_device_attribute *devt_attr; - struct class_device_attribute uevent_attr; struct device * dev; /* not necessary, but nice to have */ void * class_data; /* class-specific data */ struct class_device *parent; /* parent of this child device, if there is one */ @@ -419,8 +416,6 @@ struct device { struct device_type *type; unsigned is_registered:1; unsigned uevent_suppress:1; - struct device_attribute uevent_attr; - struct device_attribute *devt_attr; struct semaphore sem; /* semaphore to synchronize calls to * its driver. -- cgit v1.2.3 From b402d72cf7b338a074e3c12b305ec79284e18845 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jun 2007 04:27:21 +0900 Subject: sysfs: rename sysfs_dirent->s_type to s_flags and make room for flags Rename sysfs_dirent->s_type to s_flags, pack type into lower eight bits and reserve the rest for flags. sysfs_type() can used to access the type. All existing sd->s_type accesses are converted to use sysfs_type(). While at it, type test is changed to equality test instead of bit-and test where appropriate. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 161e19aa2b4f..58135509023e 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -74,6 +74,7 @@ struct sysfs_ops { ssize_t (*store)(struct kobject *,struct attribute *,const char *, size_t); }; +#define SYSFS_TYPE_MASK 0x00ff #define SYSFS_ROOT 0x0001 #define SYSFS_DIR 0x0002 #define SYSFS_KOBJ_ATTR 0x0004 @@ -82,6 +83,8 @@ struct sysfs_ops { #define SYSFS_NOT_PINNED (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR | SYSFS_KOBJ_LINK) #define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) +#define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK + #ifdef CONFIG_SYSFS extern int sysfs_schedule_callback(struct kobject *kobj, -- cgit v1.2.3 From 380e6fbb729a55b73d5d8409551474884e0d93fc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jun 2007 04:27:22 +0900 Subject: sysfs: implement SYSFS_FLAG_REMOVED flag Implement SYSFS_FLAG_REMOVED flag which currently is used only to improve sanity check in sysfs_deactivate(). The flag will be used to make directory entries reclamiable. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 58135509023e..2a6df6444e69 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -84,6 +84,7 @@ struct sysfs_ops { #define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) #define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK +#define SYSFS_FLAG_REMOVED 0x0100 #ifdef CONFIG_SYSFS -- cgit v1.2.3 From 608e266a2d4e62c1b98c1c573064b6afe8c06a58 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jun 2007 04:27:22 +0900 Subject: sysfs: make kobj point to sysfs_dirent instead of dentry As kobj sysfs dentries and inodes are gonna be made reclaimable, dentry can't be used as naming token for sysfs file/directory, replace kobj->dentry with kobj->sd. The only external interface change is shadow directory handling. All other changes are contained in kobj and sysfs. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 9 +++++---- include/linux/sysfs.h | 19 +++++++++++-------- 2 files changed, 16 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index c288e41ba331..06cbf41d32d2 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -55,7 +55,7 @@ struct kobject { struct kobject * parent; struct kset * kset; struct kobj_type * ktype; - struct dentry * dentry; + struct sysfs_dirent * sd; wait_queue_head_t poll; }; @@ -71,13 +71,14 @@ extern void kobject_init(struct kobject *); extern void kobject_cleanup(struct kobject *); extern int __must_check kobject_add(struct kobject *); -extern int __must_check kobject_shadow_add(struct kobject *, struct dentry *); +extern int __must_check kobject_shadow_add(struct kobject *kobj, + struct sysfs_dirent *shadow_parent); extern void kobject_del(struct kobject *); extern int __must_check kobject_rename(struct kobject *, const char *new_name); extern int __must_check kobject_shadow_rename(struct kobject *kobj, - struct dentry *new_parent, - const char *new_name); + struct sysfs_dirent *new_parent, + const char *new_name); extern int __must_check kobject_move(struct kobject *, struct kobject *); extern int __must_check kobject_register(struct kobject *); diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 2a6df6444e69..4c43030fae5d 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -19,6 +19,7 @@ struct kobject; struct module; struct nameidata; struct dentry; +struct sysfs_dirent; /* FIXME * The *owner field is no longer used, but leave around @@ -92,13 +93,14 @@ extern int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), void *data, struct module *owner); extern int __must_check -sysfs_create_dir(struct kobject *, struct dentry *); +sysfs_create_dir(struct kobject *kobj, struct sysfs_dirent *shadow_parent_sd); extern void sysfs_remove_dir(struct kobject *); extern int __must_check -sysfs_rename_dir(struct kobject *, struct dentry *, const char *new_name); +sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd, + const char *new_name); extern int __must_check sysfs_move_dir(struct kobject *, struct kobject *); @@ -138,8 +140,8 @@ void sysfs_notify(struct kobject * k, char *dir, char *attr); extern int sysfs_make_shadowed_dir(struct kobject *kobj, void * (*follow_link)(struct dentry *, struct nameidata *)); -extern struct dentry *sysfs_create_shadow_dir(struct kobject *kobj); -extern void sysfs_remove_shadow_dir(struct dentry *dir); +extern struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj); +extern void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd); extern int __must_check sysfs_init(void); @@ -151,7 +153,8 @@ static inline int sysfs_schedule_callback(struct kobject *kobj, return -ENOSYS; } -static inline int sysfs_create_dir(struct kobject * k, struct dentry *shadow) +static inline int sysfs_create_dir(struct kobject *kobj, + struct sysfs_dirent *shadow_parent_sd) { return 0; } @@ -161,9 +164,9 @@ static inline void sysfs_remove_dir(struct kobject * k) ; } -static inline int sysfs_rename_dir(struct kobject * k, - struct dentry *new_parent, - const char *new_name) +static inline int sysfs_rename_dir(struct kobject *kobj, + struct sysfs_dirent *new_parent_sd, + const char *new_name) { return 0; } -- cgit v1.2.3 From 51225039f3cf9d250596d1344494b293274b9169 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jun 2007 04:27:25 +0900 Subject: sysfs: make directory dentries and inodes reclaimable This patch makes dentries and inodes for sysfs directories reclaimable. * sysfs_notify() is modified to walk sysfs_dirent tree instead of dentry tree. * sysfs_update_file() and sysfs_chmod_file() use sysfs_get_dentry() to grab the victim dentry. * sysfs_rename_dir() and sysfs_move_dir() grab all dentries using sysfs_get_dentry() on startup. * Dentries for all shadowed directories are pinned in memory to serve as lookup start point. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 4c43030fae5d..2f58ca1af770 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -81,7 +81,6 @@ struct sysfs_ops { #define SYSFS_KOBJ_ATTR 0x0004 #define SYSFS_KOBJ_BIN_ATTR 0x0008 #define SYSFS_KOBJ_LINK 0x0020 -#define SYSFS_NOT_PINNED (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR | SYSFS_KOBJ_LINK) #define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) #define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK -- cgit v1.2.3 From 91a6902958f052358899f58683d44e36228d85c2 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Sat, 9 Jun 2007 13:57:22 +0800 Subject: sysfs: add parameter "struct bin_attribute *" in .read/.write methods for sysfs binary attributes Well, first of all, I don't want to change so many files either. What I do: Adding a new parameter "struct bin_attribute *" in the .read/.write methods for the sysfs binary attributes. In fact, only the four lines change in fs/sysfs/bin.c and include/linux/sysfs.h do the real work. But I have to update all the files that use binary attributes to make them compatible with the new .read and .write methods. I'm not sure if I missed any. :( Why I do this: For a sysfs attribute, we can get a pointer pointing to the struct attribute in the .show/.store method, while we can't do this for the binary attributes. I don't know why this is different, but this does make it not so handy to use the binary attributes as the regular ones. So I think this patch is reasonable. :) Who benefits from it: The patch that exposes ACPI tables in sysfs requires such an improvement. All the table binary attributes share the same .read method. Parameter "struct bin_attribute *" is used to get the table signature and instance number which are used to distinguish different ACPI table binary attributes. Without this parameter, we need to offer different .read methods for different ACPI table binary attributes. This is impossible as there are various ACPI tables on different platforms, and we don't know what they are until they are loaded. Signed-off-by: Zhang Rui Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 2f58ca1af770..be8228e50a27 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -64,8 +64,10 @@ struct bin_attribute { struct attribute attr; size_t size; void *private; - ssize_t (*read)(struct kobject *, char *, loff_t, size_t); - ssize_t (*write)(struct kobject *, char *, loff_t, size_t); + ssize_t (*read)(struct kobject *, struct bin_attribute *, + char *, loff_t, size_t); + ssize_t (*write)(struct kobject *, struct bin_attribute *, + char *, loff_t, size_t); int (*mmap)(struct kobject *, struct bin_attribute *attr, struct vm_area_struct *vma); }; -- cgit v1.2.3 From 29578624e354f56143d92510fff33a8b2aaa2c03 Mon Sep 17 00:00:00 2001 From: Olaf Kirch Date: Wed, 11 Jul 2007 19:32:02 -0700 Subject: [NET]: Fix races in net_rx_action vs netpoll. Keep netpoll/poll_napi from messing with the poll_list. Only net_rx_action is allowed to manipulate the list. Signed-off-by: Olaf Kirch Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8590d685d935..79cc3dab4be7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -261,6 +261,8 @@ enum netdev_state_t __LINK_STATE_LINKWATCH_PENDING, __LINK_STATE_DORMANT, __LINK_STATE_QDISC_RUNNING, + /* Set by the netpoll NAPI code */ + __LINK_STATE_POLL_LIST_FROZEN, }; @@ -1014,6 +1016,14 @@ static inline void netif_rx_complete(struct net_device *dev) { unsigned long flags; +#ifdef CONFIG_NETPOLL + /* Prevent race with netpoll - yes, this is a kludge. + * But at least it doesn't penalize the non-netpoll + * code path. */ + if (test_bit(__LINK_STATE_POLL_LIST_FROZEN, &dev->state)) + return; +#endif + local_irq_save(flags); __netif_rx_complete(dev); local_irq_restore(flags); -- cgit v1.2.3 From 8c979c26a0f093c13290320edda799d8335e50ae Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 11 Jul 2007 19:45:24 -0700 Subject: [VLAN]: Fix MAC address handling The VLAN MAC address handling is broken in multiple ways. When the address differs when setting it, the real device is put in promiscous mode twice, but never taken out again. Additionally it doesn't resync when the real device's address is changed and needlessly puts it in promiscous mode when the vlan device is still down. Fix by moving address handling to vlan_dev_open/vlan_dev_stop and properly deal with address changes in the device notifier. Also switch to dev_unicast_add (which needs the exact same handling). Since the set_mac_address handler is identical to the generic ethernet one with these changes, kill it and use ether_setup(). Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index c7912876a210..61a57dc2ac99 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -135,6 +135,7 @@ struct vlan_dev_info { int old_allmulti; /* similar to above. */ int old_promiscuity; /* similar to above. */ struct net_device *real_dev; /* the underlying device/interface */ + unsigned char real_dev_addr[ETH_ALEN]; struct proc_dir_entry *dent; /* Holds the proc data */ unsigned long cnt_inc_headroom_on_tx; /* How many times did we have to grow the skb on TX. */ unsigned long cnt_encap_on_xmit; /* How many times did we have to encapsulate the skb on TX. */ -- cgit v1.2.3 From db3d99c090e0cdb34b1274767e062bfddbb384bc Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 11 Jul 2007 19:46:26 -0700 Subject: [NET_SCHED]: ematch: module autoloading Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_cls.h | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index c3f01b3085a4..30b8571e6b34 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -403,16 +403,13 @@ enum * 1..32767 Reserved for ematches inside kernel tree * 32768..65535 Free to use, not reliable */ -enum -{ - TCF_EM_CONTAINER, - TCF_EM_CMP, - TCF_EM_NBYTE, - TCF_EM_U32, - TCF_EM_META, - TCF_EM_TEXT, - __TCF_EM_MAX -}; +#define TCF_EM_CONTAINER 0 +#define TCF_EM_CMP 1 +#define TCF_EM_NBYTE 2 +#define TCF_EM_U32 3 +#define TCF_EM_META 4 +#define TCF_EM_TEXT 5 +#define TCF_EM_MAX 5 enum { -- cgit v1.2.3 From ed0321895182ffb6ecf210e066d87911b270d587 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Jun 2007 15:55:21 -0400 Subject: security: Protection for exploiting null dereference using mmap Add a new security check on mmap operations to see if the user is attempting to mmap to low area of the address space. The amount of space protected is indicated by the new proc tunable /proc/sys/vm/mmap_min_addr and defaults to 0, preserving existing behavior. This patch uses a new SELinux security class "memprotect." Policy already contains a number of allow rules like a_t self:process * (unconfined_t being one of them) which mean that putting this check in the process class (its best current fit) would make it useless as all user processes, which we also want to protect against, would be allowed. By taking the memprotect name of the new class it will also make it possible for us to move some of the other memory protect permissions out of 'process' and into the new class next time we bump the policy version number (which I also think is a good future idea) Acked-by: Stephen Smalley Acked-by: Chris Wright Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/linux/security.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 9eb9e0fe0331..c11dc8aa0351 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -71,6 +71,7 @@ struct xfrm_user_sec_ctx; extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb); extern int cap_netlink_recv(struct sk_buff *skb, int cap); +extern unsigned long mmap_min_addr; /* * Values used in the task_security_ops calls */ @@ -1241,8 +1242,9 @@ struct security_operations { int (*file_ioctl) (struct file * file, unsigned int cmd, unsigned long arg); int (*file_mmap) (struct file * file, - unsigned long reqprot, - unsigned long prot, unsigned long flags); + unsigned long reqprot, unsigned long prot, + unsigned long flags, unsigned long addr, + unsigned long addr_only); int (*file_mprotect) (struct vm_area_struct * vma, unsigned long reqprot, unsigned long prot); @@ -1814,9 +1816,12 @@ static inline int security_file_ioctl (struct file *file, unsigned int cmd, static inline int security_file_mmap (struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags) + unsigned long flags, + unsigned long addr, + unsigned long addr_only) { - return security_ops->file_mmap (file, reqprot, prot, flags); + return security_ops->file_mmap (file, reqprot, prot, flags, addr, + addr_only); } static inline int security_file_mprotect (struct vm_area_struct *vma, @@ -2489,7 +2494,9 @@ static inline int security_file_ioctl (struct file *file, unsigned int cmd, static inline int security_file_mmap (struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags) + unsigned long flags, + unsigned long addr, + unsigned long addr_only) { return 0; } -- cgit v1.2.3 From d64f73be1b59b9556de0a8fbd4f1a003c6a45a5c Mon Sep 17 00:00:00 2001 From: David Brownell Date: Thu, 12 Jul 2007 14:12:28 +0200 Subject: i2c: Add kernel documentation Generate I2C kerneldoc; fix various glitches and add "context" sections to that documentation. Most I2C and SMBus functions still have no kerneldoc. Let me suggest providing kerneldoc for all the i2c_smbus_*() functions as a small and mostly self-contained project for anyone so inclined. :) Signed-off-by: David Brownell Signed-off-by: Jean Delvare --- include/linux/i2c.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index cae7d618030c..a24e267fd189 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -150,15 +150,20 @@ struct i2c_driver { /** * struct i2c_client - represent an I2C slave device + * @flags: I2C_CLIENT_TEN indicates the device uses a ten bit chip address; + * I2C_CLIENT_PEC indicates it uses SMBus Packet Error Checking * @addr: Address used on the I2C bus connected to the parent adapter. * @name: Indicates the type of the device, usually a chip name that's * generic enough to hide second-sourcing and compatible revisions. + * @adapter: manages the bus segment hosting this I2C device * @dev: Driver model device node for the slave. + * @irq: indicates the IRQ generated by this device (if any) * @driver_name: Identifies new-style driver used with this device; also * used as the module name for hotplug/coldplug modprobe support. * * An i2c_client identifies a single device (i.e. chip) connected to an - * i2c bus. The behaviour is defined by the routines of the driver. + * i2c bus. The behaviour exposed to Linux is defined by the driver + * managing the device. */ struct i2c_client { unsigned short flags; /* div., see below */ @@ -201,7 +206,7 @@ static inline void i2c_set_clientdata (struct i2c_client *dev, void *data) * @addr: stored in i2c_client.addr * @platform_data: stored in i2c_client.dev.platform_data * @irq: stored in i2c_client.irq - + * * I2C doesn't actually support hardware probing, although controllers and * devices may be able to use I2C_SMBUS_QUICK to tell whether or not there's * a device at a given address. Drivers commonly need more information than @@ -210,7 +215,7 @@ static inline void i2c_set_clientdata (struct i2c_client *dev, void *data) * i2c_board_info is used to build tables of information listing I2C devices * that are present. This information is used to grow the driver model tree * for "new style" I2C drivers. For mainboards this is done statically using - * i2c_register_board_info(), where @bus_num represents an adapter that isn't + * i2c_register_board_info(); bus numbers identify adapters that aren't * yet available. For add-on boards, i2c_new_device() does this dynamically * with the adapter already known. */ -- cgit v1.2.3 From d75d53cd571c02990d56e72f615ab11e943772f9 Mon Sep 17 00:00:00 2001 From: "Mark M. Hoffman" Date: Thu, 12 Jul 2007 14:12:28 +0200 Subject: i2c: Fix sparse warning in i2c.h Kill a sparse warning by un-nesting two container_of() calls. Signed-off-by: Mark M. Hoffman Signed-off-by: Jean Delvare --- include/linux/i2c.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index a24e267fd189..44f2ecf47d9f 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -185,7 +185,8 @@ struct i2c_client { static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj) { - return to_i2c_client(container_of(kobj, struct device, kobj)); + struct device * const dev = container_of(kobj, struct device, kobj); + return to_i2c_client(dev); } static inline void *i2c_get_clientdata (struct i2c_client *dev) -- cgit v1.2.3 From 4b2643d7d9bdcd776749e17f73c168ddf02e93cb Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 12 Jul 2007 14:12:29 +0200 Subject: i2c: Fix the i2c_smbus_read_i2c_block_data() prototype Let the drivers specify how many bytes they want to read with i2c_smbus_read_i2c_block_data(). So far, the block count was hard-coded to I2C_SMBUS_BLOCK_MAX (32), which did not make much sense. Many driver authors complained about this before, and I believe it's about time to fix it. Right now, authors have to do technically stupid things, such as individual byte reads or full-fledged I2C messaging, to work around the problem. We do not want to encourage that. I even found that some bus drivers (e.g. i2c-amd8111) already implemented I2C block read the "right" way, that is, they didn't follow the old, broken standard. The fact that it was never noticed before just shows how little i2c_smbus_read_i2c_block_data() was used, which isn't that surprising given how broken its prototype was so far. There are some obvious compatiblity considerations: * This changes the i2c_smbus_read_i2c_block_data() prototype. Users outside the kernel tree will notice at compilation time, and will have to update their code. * User-space has access to i2c_smbus_xfer() directly using i2c-dev, so the changed expectations would affect tools such as i2cdump. In order to preserve binary compatibility, we give I2C_SMBUS_I2C_BLOCK_DATA a new numeric value, and define I2C_SMBUS_I2C_BLOCK_BROKEN with the old numeric value. When i2c-dev receives a transaction with the old value, it can convert it to the new format on the fly. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 44f2ecf47d9f..2eaba21b9b1a 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -90,7 +90,7 @@ extern s32 i2c_smbus_write_block_data(struct i2c_client * client, const u8 *values); /* Returns the number of read bytes */ extern s32 i2c_smbus_read_i2c_block_data(struct i2c_client * client, - u8 command, u8 *values); + u8 command, u8 length, u8 *values); extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client * client, u8 command, u8 length, const u8 *values); @@ -524,8 +524,9 @@ union i2c_smbus_data { #define I2C_SMBUS_WORD_DATA 3 #define I2C_SMBUS_PROC_CALL 4 #define I2C_SMBUS_BLOCK_DATA 5 -#define I2C_SMBUS_I2C_BLOCK_DATA 6 +#define I2C_SMBUS_I2C_BLOCK_BROKEN 6 #define I2C_SMBUS_BLOCK_PROC_CALL 7 /* SMBus 2.0 */ +#define I2C_SMBUS_I2C_BLOCK_DATA 8 /* ----- commands for the ioctl like i2c_command call: -- cgit v1.2.3 From c29c22218b99dad95f7cd0281415a854aeee805c Mon Sep 17 00:00:00 2001 From: Henry Su Date: Thu, 12 Jul 2007 14:12:29 +0200 Subject: i2c-piix4: Add support for the ATI SB700 Add the SMBus device ID for ATI SB700. Signed-off-by: Henry Su Signed-off-by: Jean Delvare --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 75c4d4d06892..8300001e9078 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -371,6 +371,7 @@ #define PCI_DEVICE_ID_ATI_IXP600_SMBUS 0x4385 #define PCI_DEVICE_ID_ATI_IXP600_IDE 0x438c #define PCI_DEVICE_ID_ATI_IXP700_SATA 0x4390 +#define PCI_DEVICE_ID_ATI_IXP700_SMBUS 0x4395 #define PCI_DEVICE_ID_ATI_IXP700_IDE 0x439c #define PCI_VENDOR_ID_VLSI 0x1004 -- cgit v1.2.3 From b9cdad74883a797952de52464d118d685cafc05a Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 12 Jul 2007 14:12:31 +0200 Subject: i2c: New bus driver for the TAOS evaluation modules This is a new I2C bus driver for the TAOS evaluation modules. Developped and tested on the TAOS TSL2550 EVM. Signed-off-by: Jean Delvare --- include/linux/serio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serio.h b/include/linux/serio.h index 1ebf0455e224..d9377ce9ffd1 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -209,5 +209,6 @@ static inline void serio_unpin_driver(struct serio *serio) #define SERIO_PENMOUNT 0x31 #define SERIO_TOUCHRIGHT 0x32 #define SERIO_TOUCHWIN 0x33 +#define SERIO_TAOSEVM 0x34 #endif -- cgit v1.2.3 From e087db510cd96a75a614f6f6fcd5499ab21cb087 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 11 Jul 2007 12:18:31 -0700 Subject: Clean up struct screen_info () struct screen_info has unaligned members, it needs to be packed. In the process, fix the naming of some of the members, which don't belong in this structure but are part of it anyway. Signed-off-by: H. Peter Anvin Signed-off-by: Linus Torvalds --- include/linux/screen_info.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index b02308ee7667..3ee412bc00ec 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -10,7 +10,7 @@ struct screen_info { u8 orig_x; /* 0x00 */ u8 orig_y; /* 0x01 */ - u16 dontuse1; /* 0x02 -- EXT_MEM_K sits here */ + u16 ext_mem_k; /* 0x02 */ u16 orig_video_page; /* 0x04 */ u8 orig_video_mode; /* 0x06 */ u8 orig_video_cols; /* 0x07 */ @@ -27,7 +27,7 @@ struct screen_info { u16 lfb_depth; /* 0x16 */ u32 lfb_base; /* 0x18 */ u32 lfb_size; /* 0x1c */ - u16 dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */ + u16 cl_magic, cl_offset; /* 0x20 */ u16 lfb_linelength; /* 0x24 */ u8 red_size; /* 0x26 */ u8 red_pos; /* 0x27 */ @@ -42,9 +42,8 @@ struct screen_info { u16 pages; /* 0x32 */ u16 vesa_attributes; /* 0x34 */ u32 capabilities; /* 0x36 */ - /* 0x3a -- 0x3b reserved for future expansion */ - /* 0x3c -- 0x3f micro stack for relocatable kernels */ -}; + u8 _reserved[6]; /* 0x3a */ +} __attribute__((packed)); extern struct screen_info screen_info; -- cgit v1.2.3 From c39736823232bc3ca113c8228fa852c09fba300e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 11 Jul 2007 12:18:58 -0700 Subject: Remove old i386 setup code This removes the old i386 setup code. This is done as a separate patch to avoid breaking git bisect as some of the i386 code was also used by the old x86-64 code. Signed-off-by: H. Peter Anvin Signed-off-by: Linus Torvalds --- include/linux/edd.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edd.h b/include/linux/edd.h index b2b3e68aa512..7b647822d6dc 100644 --- a/include/linux/edd.h +++ b/include/linux/edd.h @@ -49,10 +49,6 @@ #define EDD_MBR_SIG_MAX 16 /* max number of signatures to store */ #define EDD_MBR_SIG_NR_BUF 0x1ea /* addr of number of MBR signtaures at EDD_MBR_SIG_BUF in boot_params - treat this as 1 byte */ -#define EDD_CL_EQUALS 0x3d646465 /* "edd=" */ -#define EDD_CL_OFF 0x666f /* "of" for off */ -#define EDD_CL_SKIP 0x6b73 /* "sk" for skipmbr */ -#define EDD_CL_ON 0x6e6f /* "on" for on */ #ifndef __ASSEMBLY__ -- cgit v1.2.3 From 5628221caf88e2a052782b042e12da7cd34111b0 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 10 Jul 2007 19:32:10 +0200 Subject: [PATCH] mac80211: ERP IE handling improvements The "protection needed" flag is currently parsed out of the ERP IE in beacons. This patch allows the ERP IE to be available at assocation time and causes the appropriate actions to be performed earlier. It is slightly complicated by the fact that most APs don't include the ERP IE in association responses. To work around this, we store ERP values in the ieee80211_sta_bss structure. Also added some WLAN_ERP defines for use by upcoming patches. Signed-off-by: Jiri Benc Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index ecd61e8438a5..272f8c8c90da 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -227,6 +227,17 @@ struct ieee80211_cts { #define WLAN_CAPABILITY_SHORT_SLOT_TIME (1<<10) #define WLAN_CAPABILITY_DSSS_OFDM (1<<13) +/* 802.11g ERP information element */ +#define WLAN_ERP_NON_ERP_PRESENT (1<<0) +#define WLAN_ERP_USE_PROTECTION (1<<1) +#define WLAN_ERP_BARKER_PREAMBLE (1<<2) + +/* WLAN_ERP_BARKER_PREAMBLE values */ +enum { + WLAN_ERP_PREAMBLE_SHORT = 0, + WLAN_ERP_PREAMBLE_LONG = 1, +}; + /* Status codes */ enum ieee80211_statuscode { WLAN_STATUS_SUCCESS = 0, -- cgit v1.2.3 From 6a775e2ba4f7635849ade628e64723ab2beef0bc Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 21 Jun 2007 12:27:47 +0300 Subject: IB/mlx4: Implement query QP Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/qp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 10c57d279144..3968b943259a 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -282,6 +282,9 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar, int sqd_event, struct mlx4_qp *qp); +int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, + struct mlx4_qp_context *context); + static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) { return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1)); -- cgit v1.2.3 From 65541cb7cf353946ecd78016a453b453b8830656 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 21 Jun 2007 13:03:11 +0300 Subject: IB/mlx4: Implement query SRQ Signed-off-by: Dotan Barak Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 8209387ee854..cfb78fb2c046 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -323,6 +323,7 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq); void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq); int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark); +int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark); int mlx4_INIT_PORT(struct mlx4_dev *dev, int port); int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port); -- cgit v1.2.3 From ec22559e0b7a05283a3413bda5d177e42c950e23 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Fri, 27 Apr 2007 20:54:57 +0200 Subject: USB: suspend support for usb serial this implements generic support for suspend/resume for usb serial. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/serial.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 32acbae28d24..e8b8928232c8 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -221,6 +221,9 @@ struct usb_serial_driver { int (*port_probe) (struct usb_serial_port *port); int (*port_remove) (struct usb_serial_port *port); + int (*suspend) (struct usb_serial *serial, pm_message_t message); + int (*resume) (struct usb_serial *serial); + /* serial function calls */ int (*open) (struct usb_serial_port *port, struct file * filp); void (*close) (struct usb_serial_port *port, struct file * filp); @@ -249,6 +252,9 @@ extern void usb_serial_port_softint(struct usb_serial_port *port); extern int usb_serial_probe(struct usb_interface *iface, const struct usb_device_id *id); extern void usb_serial_disconnect(struct usb_interface *iface); +extern int usb_serial_suspend(struct usb_interface *intf, pm_message_t message); +extern int usb_serial_resume(struct usb_interface *intf); + extern int ezusb_writememory (struct usb_serial *serial, int address, unsigned char *data, int length, __u8 bRequest); extern int ezusb_set_reset (struct usb_serial *serial, unsigned char reset_bit); @@ -269,6 +275,7 @@ extern void usb_serial_put(struct usb_serial *serial); extern int usb_serial_generic_open (struct usb_serial_port *port, struct file *filp); extern int usb_serial_generic_write (struct usb_serial_port *port, const unsigned char *buf, int count); extern void usb_serial_generic_close (struct usb_serial_port *port, struct file *filp); +extern int usb_serial_generic_resume (struct usb_serial *serial); extern int usb_serial_generic_write_room (struct usb_serial_port *port); extern int usb_serial_generic_chars_in_buffer (struct usb_serial_port *port); extern void usb_serial_generic_read_bulk_callback (struct urb *urb); -- cgit v1.2.3 From 0458d5b4c9cc4ca0f62625d0144ddc4b4bc97a3c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 4 May 2007 11:52:20 -0400 Subject: USB: add USB-Persist facility This patch (as886) adds the controversial USB-persist facility, allowing USB devices to persist across a power loss during system suspend. The facility is controlled by a new Kconfig option (with appropriate warnings about the potential dangers); when the option is off the behavior will remain the same as it is now. But when the option is on, people will be able to use suspend-to-disk and keep their USB filesystems intact -- something particularly valuable for small machines where the root filesystem is on a USB device! Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 56aa2ee21f1b..3d63e0c2dd70 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -403,6 +403,7 @@ struct usb_device { unsigned auto_pm:1; /* autosuspend/resume in progress */ unsigned do_remote_wakeup:1; /* remote wakeup should be enabled */ + unsigned reset_resume:1; /* needs reset instead of resume */ unsigned autosuspend_disabled:1; /* autosuspend and autoresume */ unsigned autoresume_disabled:1; /* disabled by the user */ #endif @@ -819,7 +820,10 @@ struct usbdrv_wrap { * @pre_reset: Called by usb_reset_composite_device() when the device * is about to be reset. * @post_reset: Called by usb_reset_composite_device() after the device - * has been reset. + * has been reset, or in lieu of @resume following a reset-resume + * (i.e., the device is reset instead of being resumed, as might + * happen if power was lost). The second argument tells which is + * the reason. * @id_table: USB drivers use ID table to support hotplugging. * Export this with MODULE_DEVICE_TABLE(usb,...). This must be set * or your driver's probe function will never get called. @@ -861,7 +865,7 @@ struct usb_driver { int (*resume) (struct usb_interface *intf); void (*pre_reset) (struct usb_interface *intf); - void (*post_reset) (struct usb_interface *intf); + void (*post_reset) (struct usb_interface *intf, int reset_resume); const struct usb_device_id *id_table; -- cgit v1.2.3 From 6bc6cff52e0c4c4c876b1b8a5750041da61ad42b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 4 May 2007 11:53:03 -0400 Subject: USB: add RESET_RESUME device quirk This patch (as888) adds a new USB device quirk for devices which are unable to resume correctly. By using the new code added for the USB-persist facility, it is a simple matter to reset these devices instead of resuming them. To get things kicked off, a quirk entry is added for the Philips PSC805. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/quirks.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index 6bac8faacbc6..8da374caf582 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -9,3 +9,6 @@ /* string descriptors must not be fetched using a 255-byte read */ #define USB_QUIRK_STRING_FETCH_255 0x00000002 + +/* device can't resume correctly so reset it instead */ +#define USB_QUIRK_RESET_RESUME 0x00000004 -- cgit v1.2.3 From 8538f96ae5aada1c04d69a993b20ad160b191d47 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 10 May 2007 00:32:24 +0100 Subject: USB: add USB_DEVICE_AND_INTERFACE_INFO for device matching Recently, the USB device matching code stopped matching generic interface matches against devices with vendor-specific device class values. Some drivers now need to explicitly match USB device ID's (in addition to generic interface info) to retain the same behaviour as before. This new macro, suggested by Alan Stern, makes the explicit device/interface matching a little simpler for those users. Signed-off-by: Daniel Drake Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 3d63e0c2dd70..98e0338664fb 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -771,6 +771,28 @@ static inline int usb_endpoint_is_isoc_out(const struct usb_endpoint_descriptor .match_flags = USB_DEVICE_ID_MATCH_INT_INFO, .bInterfaceClass = (cl), \ .bInterfaceSubClass = (sc), .bInterfaceProtocol = (pr) +/** + * USB_DEVICE_AND_INTERFACE_INFO - macro used to describe a specific usb device + * with a class of usb interfaces + * @vend: the 16 bit USB Vendor ID + * @prod: the 16 bit USB Product ID + * @cl: bInterfaceClass value + * @sc: bInterfaceSubClass value + * @pr: bInterfaceProtocol value + * + * This macro is used to create a struct usb_device_id that matches a + * specific device with a specific class of interfaces. + * + * This is especially useful when explicitly matching devices that have + * vendor specific bDeviceClass values, but standards-compliant interfaces. + */ +#define USB_DEVICE_AND_INTERFACE_INFO(vend,prod,cl,sc,pr) \ + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO \ + | USB_DEVICE_ID_MATCH_DEVICE, \ + .idVendor = (vend), .idProduct = (prod), \ + .bInterfaceClass = (cl), \ + .bInterfaceSubClass = (sc), .bInterfaceProtocol = (pr) + /* ----------------------------------------------------------------------- */ /* Stuff for dynamic usb ids */ -- cgit v1.2.3 From a5262dcfda9163ca1f8a64349a6f7ba640ac1dc2 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Mon, 14 May 2007 19:36:41 -0700 Subject: USB: export as Make sure gadgetfs userspace interface is properly exported: - Move to ; - Export it using Kbuild; - Add an #include guard; - Correct some internal documentation; - Update struct layout so it's the same on 32/64 bit kernels. Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/Kbuild | 1 + include/linux/usb/gadgetfs.h | 81 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb_gadgetfs.h | 75 ---------------------------------------- 3 files changed, 82 insertions(+), 75 deletions(-) create mode 100644 include/linux/usb/gadgetfs.h delete mode 100644 include/linux/usb_gadgetfs.h (limited to 'include/linux') diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild index 43f160cfe003..6ce42bf9f743 100644 --- a/include/linux/usb/Kbuild +++ b/include/linux/usb/Kbuild @@ -1,5 +1,6 @@ unifdef-y += audio.h unifdef-y += cdc.h unifdef-y += ch9.h +unifdef-y += gadgetfs.h unifdef-y += midi.h diff --git a/include/linux/usb/gadgetfs.h b/include/linux/usb/gadgetfs.h new file mode 100644 index 000000000000..e8654c338729 --- /dev/null +++ b/include/linux/usb/gadgetfs.h @@ -0,0 +1,81 @@ +#ifndef __LINUX_USB_GADGETFS_H +#define __LINUX_USB_GADGETFS_H + +#include +#include + +#include + +/* + * Filesystem based user-mode API to USB Gadget controller hardware + * + * Other than ep0 operations, most things are done by read() and write() + * on endpoint files found in one directory. They are configured by + * writing descriptors, and then may be used for normal stream style + * i/o requests. When ep0 is configured, the device can enumerate; + * when it's closed, the device disconnects from usb. Operations on + * ep0 require ioctl() operations. + * + * Configuration and device descriptors get written to /dev/gadget/$CHIP, + * which may then be used to read usb_gadgetfs_event structs. The driver + * may activate endpoints as it handles SET_CONFIGURATION setup events, + * or earlier; writing endpoint descriptors to /dev/gadget/$ENDPOINT + * then performing data transfers by reading or writing. + */ + +/* + * Events are delivered on the ep0 file descriptor, when the user mode driver + * reads from this file descriptor after writing the descriptors. Don't + * stop polling this descriptor. + */ + +enum usb_gadgetfs_event_type { + GADGETFS_NOP = 0, + + GADGETFS_CONNECT, + GADGETFS_DISCONNECT, + GADGETFS_SETUP, + GADGETFS_SUSPEND, + // and likely more ! +}; + +/* NOTE: this structure must stay the same size and layout on + * both 32-bit and 64-bit kernels. + */ +struct usb_gadgetfs_event { + union { + // NOP, DISCONNECT, SUSPEND: nothing + // ... some hardware can't report disconnection + + // CONNECT: just the speed + enum usb_device_speed speed; + + // SETUP: packet; DATA phase i/o precedes next event + // (setup.bmRequestType & USB_DIR_IN) flags direction + // ... includes SET_CONFIGURATION, SET_INTERFACE + struct usb_ctrlrequest setup; + } u; + enum usb_gadgetfs_event_type type; +}; + + +/* endpoint ioctls */ + +/* IN transfers may be reported to the gadget driver as complete + * when the fifo is loaded, before the host reads the data; + * OUT transfers may be reported to the host's "client" driver as + * complete when they're sitting in the FIFO unread. + * THIS returns how many bytes are "unclaimed" in the endpoint fifo + * (needed for precise fault handling, when the hardware allows it) + */ +#define GADGETFS_FIFO_STATUS _IO('g',1) + +/* discards any unclaimed data in the fifo. */ +#define GADGETFS_FIFO_FLUSH _IO('g',2) + +/* resets endpoint halt+toggle; used to implement set_interface. + * some hardware (like pxa2xx) can't support this. + */ +#define GADGETFS_CLEAR_HALT _IO('g',3) + +#endif /* __LINUX_USB_GADGETFS_H */ diff --git a/include/linux/usb_gadgetfs.h b/include/linux/usb_gadgetfs.h deleted file mode 100644 index 8086d5a9b94e..000000000000 --- a/include/linux/usb_gadgetfs.h +++ /dev/null @@ -1,75 +0,0 @@ - -#include -#include - -#include - -/* - * Filesystem based user-mode API to USB Gadget controller hardware - * - * Almost everything can be done with only read and write operations, - * on endpoint files found in one directory. They are configured by - * writing descriptors, and then may be used for normal stream style - * i/o requests. When ep0 is configured, the device can enumerate; - * when it's closed, the device disconnects from usb. - * - * Configuration and device descriptors get written to /dev/gadget/$CHIP, - * which may then be used to read usb_gadgetfs_event structs. The driver - * may activate endpoints as it handles SET_CONFIGURATION setup events, - * or earlier; writing endpoint descriptors to /dev/gadget/$ENDPOINT - * then performing data transfers by reading or writing. - */ - -/* - * Events are delivered on the ep0 file descriptor, if the user mode driver - * reads from this file descriptor after writing the descriptors. Don't - * stop polling this descriptor, if you write that kind of driver. - */ - -enum usb_gadgetfs_event_type { - GADGETFS_NOP = 0, - - GADGETFS_CONNECT, - GADGETFS_DISCONNECT, - GADGETFS_SETUP, - GADGETFS_SUSPEND, - // and likely more ! -}; - -struct usb_gadgetfs_event { - enum usb_gadgetfs_event_type type; - union { - // NOP, DISCONNECT, SUSPEND: nothing - // ... some hardware can't report disconnection - - // CONNECT: just the speed - enum usb_device_speed speed; - - // SETUP: packet; DATA phase i/o precedes next event - // (setup.bmRequestType & USB_DIR_IN) flags direction - // ... includes SET_CONFIGURATION, SET_INTERFACE - struct usb_ctrlrequest setup; - } u; -}; - - -/* endpoint ioctls */ - -/* IN transfers may be reported to the gadget driver as complete - * when the fifo is loaded, before the host reads the data; - * OUT transfers may be reported to the host's "client" driver as - * complete when they're sitting in the FIFO unread. - * THIS returns how many bytes are "unclaimed" in the endpoint fifo - * (needed for precise fault handling, when the hardware allows it) - */ -#define GADGETFS_FIFO_STATUS _IO('g',1) - -/* discards any unclaimed data in the fifo. */ -#define GADGETFS_FIFO_FLUSH _IO('g',2) - -/* resets endpoint halt+toggle; used to implement set_interface. - * some hardware (like pxa2xx) can't support this. - */ -#define GADGETFS_CLEAR_HALT _IO('g',3) - - -- cgit v1.2.3 From 51a2f077c44e559841b09de6da605b4d3ae40dad Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Fri, 25 May 2007 13:40:56 +0200 Subject: USB: introduce usb_anchor - introduction of usb_anchor and its methods Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 98e0338664fb..0873c6219efc 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1000,11 +1000,26 @@ struct usb_iso_packet_descriptor { struct urb; +struct usb_anchor { + struct list_head urb_list; + wait_queue_head_t wait; + spinlock_t lock; +}; + +static inline void init_usb_anchor(struct usb_anchor *anchor) +{ + INIT_LIST_HEAD(&anchor->urb_list); + init_waitqueue_head(&anchor->wait); + spin_lock_init(&anchor->lock); +} + typedef void (*usb_complete_t)(struct urb *); /** * struct urb - USB Request Block * @urb_list: For use by current owner of the URB. + * @anchor_list: membership in the list of an anchor + * @anchor: to anchor URBs to a common mooring * @pipe: Holds endpoint number, direction, type, and more. * Create these values with the eight macros available; * usb_{snd,rcv}TYPEpipe(dev,endpoint), where the TYPE is "ctrl" @@ -1177,6 +1192,8 @@ struct urb /* public: documented fields in the urb that can be used by drivers */ struct list_head urb_list; /* list head for use by the urb's * current owner */ + struct list_head anchor_list; /* the URB may be anchored by the driver */ + struct usb_anchor *anchor; struct usb_device *dev; /* (in) pointer to associated device */ unsigned int pipe; /* (in) pipe information */ int status; /* (return) non-ISO status */ @@ -1312,6 +1329,11 @@ extern struct urb *usb_get_urb(struct urb *urb); extern int usb_submit_urb(struct urb *urb, gfp_t mem_flags); extern int usb_unlink_urb(struct urb *urb); extern void usb_kill_urb(struct urb *urb); +extern void usb_kill_anchored_urbs(struct usb_anchor *anchor); +extern void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor); +extern void usb_unanchor_urb(struct urb *urb); +extern int usb_wait_anchor_empty_timeout(struct usb_anchor *anchor, + unsigned int timeout); void *usb_buffer_alloc (struct usb_device *dev, size_t size, gfp_t mem_flags, dma_addr_t *dma); -- cgit v1.2.3 From f07600cf9eb3ee92777b2001e564faa413144a99 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 30 May 2007 15:38:16 -0400 Subject: USB: add reset_resume method This patch (as918) introduces a new USB driver method: reset_resume. It is called when a device needs to be reset as part of a resume procedure (whether because of a device quirk or because of the USB-Persist facility), thereby taking over a role formerly assigned to the post_reset method. As a consequence, post_reset no longer needs an argument indicating whether it is being called as part of a reset-resume. This separation of functions makes the code clearer. In addition, the pre_reset and post_reset method return types are changed; they now must return an error code. The return value is unused at present, but at some later time we may unbind drivers and re-probe if they encounter an error during reset handling. The existing pre_reset and post_reset methods in the usbhid, usb-storage, and hub drivers are updated to match the new requirements. For usbhid the post_reset routine is also used for reset_resume (duplicate method pointers); for the other drivers a new reset_resume routine is added. The change to hub.c looks bigger than it really is, because mark_children_for_reset_resume() gets moved down next to the new hub_reset_resume() routine. A minor change to usb-storage makes the usb_stor_report_bus_reset() routine acquire the host lock instead of requiring the caller to hold it already. Signed-off-by: Alan Stern Signed-off-by: Jiri Kosina CC: Matthew Dharm Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 0873c6219efc..bde8c65e2bfc 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -839,6 +839,8 @@ struct usbdrv_wrap { * do (or don't) show up otherwise in the filesystem. * @suspend: Called when the device is going to be suspended by the system. * @resume: Called when the device is being resumed by the system. + * @reset_resume: Called when the suspended device has been reset instead + * of being resumed. * @pre_reset: Called by usb_reset_composite_device() when the device * is about to be reset. * @post_reset: Called by usb_reset_composite_device() after the device @@ -885,9 +887,10 @@ struct usb_driver { int (*suspend) (struct usb_interface *intf, pm_message_t message); int (*resume) (struct usb_interface *intf); + int (*reset_resume)(struct usb_interface *intf); - void (*pre_reset) (struct usb_interface *intf); - void (*post_reset) (struct usb_interface *intf, int reset_resume); + int (*pre_reset)(struct usb_interface *intf); + int (*post_reset)(struct usb_interface *intf); const struct usb_device_id *id_table; -- cgit v1.2.3 From b41a60eca833d76593d4dac8a59f5c38714194ee Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 30 May 2007 15:39:33 -0400 Subject: USB: add power/persist device attribute This patch (as920) adds an extra level of protection to the USB-Persist facility. Now it will apply by default only to hubs; for all other devices the user must enable it explicitly by setting the power/persist device attribute. The disconnect_all_children() routine in hub.c has been removed and its code placed inline. This is the way it was originally as part of hub_pre_reset(); the revised usage in hub_reset_resume() is sufficiently different that the code can no longer be shared. Likewise, mark_children_for_reset() is now inline as part of hub_reset_resume(). The end result looks much cleaner than before. The sysfs interface is updated to add the new attribute file, and there are corresponding documentation updates. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index bde8c65e2bfc..efce9a4c511c 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -404,6 +404,7 @@ struct usb_device { unsigned auto_pm:1; /* autosuspend/resume in progress */ unsigned do_remote_wakeup:1; /* remote wakeup should be enabled */ unsigned reset_resume:1; /* needs reset instead of resume */ + unsigned persist_enabled:1; /* USB_PERSIST enabled for this dev */ unsigned autosuspend_disabled:1; /* autosuspend and autoresume */ unsigned autoresume_disabled:1; /* disabled by the user */ #endif -- cgit v1.2.3 From 8b3b01c898a44c2fc7217eb579982b9d132113f5 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 13 Jun 2007 08:02:11 +0200 Subject: USB: Add URB_FREE_BUFFER flag and the logic behind it USB: Add URB_FREE_BUFFER flag for freeing the transfer buffer In some cases it is not needed that the driver keeps track of the transfer buffer of an URB. It can be simply freed along with the URB itself when the reference count goes down to zero. The new flag URB_FREE_BUFFER enables this behavior. Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index efce9a4c511c..533c32374e01 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -994,6 +994,7 @@ extern int usb_disabled(void); #define URB_ZERO_PACKET 0x0040 /* Finish bulk OUT with short packet */ #define URB_NO_INTERRUPT 0x0080 /* HINT: no non-error interrupt * needed */ +#define URB_FREE_BUFFER 0x0100 /* Free transfer buffer with the URB */ struct usb_iso_packet_descriptor { unsigned int offset; -- cgit v1.2.3 From 165fe97ed6107d3cde63592d5ac36400a5eb9f6f Mon Sep 17 00:00:00 2001 From: "Craig W. Nadler" Date: Fri, 15 Jun 2007 23:14:35 -0400 Subject: USB: add IAD support to usbfs and sysfs USB_IAD: Adds support for USB Interface Association Descriptors. This patch adds support to the USB host stack for parsing, storing, and displaying Interface Association Descriptors. In /proc/bus/usb/devices lines starting with A: show the fields in an IAD. In sysfs if an interface on a USB device is referenced by an IAD the following files will be added to the sysfs directory for that interface: iad_bFirstInterface, iad_bInterfaceCount, iad_bFunctionClass, and iad_bFunctionSubClass, iad_bFunctionProtocol Signed-off-by: Craig W. Nadler Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 533c32374e01..7a60946df3b6 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -146,6 +146,10 @@ struct usb_interface { * active alternate setting */ unsigned num_altsetting; /* number of alternate settings */ + /* If there is an interface association descriptor then it will list + * the associated interfaces */ + struct usb_interface_assoc_descriptor *intf_assoc; + int minor; /* minor number this interface is * bound to */ enum usb_interface_condition condition; /* state of binding */ @@ -175,6 +179,7 @@ void usb_put_intf(struct usb_interface *intf); /* this maximum is arbitrary */ #define USB_MAXINTERFACES 32 +#define USB_MAXIADS USB_MAXINTERFACES/2 /** * struct usb_interface_cache - long-term representation of a device interface @@ -245,6 +250,11 @@ struct usb_host_config { struct usb_config_descriptor desc; char *string; /* iConfiguration string, if present */ + + /* List of any Interface Association Descriptors in this + * configuration. */ + struct usb_interface_assoc_descriptor *intf_assoc[USB_MAXIADS]; + /* the interfaces associated with this configuration, * stored in no particular order */ struct usb_interface *interface[USB_MAXINTERFACES]; -- cgit v1.2.3 From 9d8bab58b758cd5a96d368a8cc64111c9ab50407 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Sun, 1 Jul 2007 11:04:54 -0700 Subject: usb gadget stack: remove usb_ep_*_buffer(), part 1 Remove usb_ep_{alloc,free}_buffer() calls, for small dma-coherent buffers. This patch just removes the interface and its users; later patches will remove controller driver support. - This interface is invariably not implemented correctly in the controller drivers (e.g. using dma pools, a mechanism which post-dates the interface by several years). - At this point no gadget driver really *needs* to use it. In current kernels, any driver that needs such a mechanism could allocate a dma pool themselves. Removing this interface is thus a simplification and improvement. Note that the gmidi.c driver had a bug in this area; fixed. Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_gadget.h | 41 ----------------------------------------- 1 file changed, 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb_gadget.h b/include/linux/usb_gadget.h index e17186dbcdca..703fd84c46fc 100644 --- a/include/linux/usb_gadget.h +++ b/include/linux/usb_gadget.h @@ -234,47 +234,6 @@ usb_ep_free_request (struct usb_ep *ep, struct usb_request *req) ep->ops->free_request (ep, req); } -/** - * usb_ep_alloc_buffer - allocate an I/O buffer - * @ep:the endpoint associated with the buffer - * @len:length of the desired buffer - * @dma:pointer to the buffer's DMA address; must be valid - * @gfp_flags:GFP_* flags to use - * - * Returns a new buffer, or null if one could not be allocated. - * The buffer is suitably aligned for dma, if that endpoint uses DMA, - * and the caller won't have to care about dma-inconsistency - * or any hidden "bounce buffer" mechanism. No additional per-request - * DMA mapping will be required for such buffers. - * Free it later with usb_ep_free_buffer(). - * - * You don't need to use this call to allocate I/O buffers unless you - * want to make sure drivers don't incur costs for such "bounce buffer" - * copies or per-request DMA mappings. - */ -static inline void * -usb_ep_alloc_buffer (struct usb_ep *ep, unsigned len, dma_addr_t *dma, - gfp_t gfp_flags) -{ - return ep->ops->alloc_buffer (ep, len, dma, gfp_flags); -} - -/** - * usb_ep_free_buffer - frees an i/o buffer - * @ep:the endpoint associated with the buffer - * @buf:CPU view address of the buffer - * @dma:the buffer's DMA address - * @len:length of the buffer - * - * reverses the effect of usb_ep_alloc_buffer(). - * caller guarantees the buffer will no longer be accessed - */ -static inline void -usb_ep_free_buffer (struct usb_ep *ep, void *buf, dma_addr_t dma, unsigned len) -{ - ep->ops->free_buffer (ep, buf, dma, len); -} - /** * usb_ep_queue - queues (submits) an I/O request to an endpoint. * @ep:the endpoint associated with the request -- cgit v1.2.3 From c67ab134ba9f83f9de86e58adfeaa14a9efa6e00 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Sun, 1 Jul 2007 12:21:00 -0700 Subject: usb gadget stack: remove usb_ep_*_buffer(), part 2 This patch removes controller driver infrastructure which supported the now-removed usb_ep_{alloc,free}_buffer() calls. As can be seen, many of the implementations of this were broken to various degrees. Many didn't properly return dma-coherent mappings; those which did so were necessarily ugly because of bogosity in the underlying dma_free_coherent() calls ... which on many platforms can't be called from the same contexts (notably in_irq) from which their dma_alloc_coherent() sibling can be called. The main potential downside of removing this is that gadget drivers wouldn't have specific knowledge that the controller drivers have: endpoints that aren't dma-capable don't need any dma mappings at all. Signed-off-by: David Brownell Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_gadget.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb_gadget.h b/include/linux/usb_gadget.h index 703fd84c46fc..4f59b2aa8a9e 100644 --- a/include/linux/usb_gadget.h +++ b/include/linux/usb_gadget.h @@ -110,13 +110,6 @@ struct usb_ep_ops { gfp_t gfp_flags); void (*free_request) (struct usb_ep *ep, struct usb_request *req); - void *(*alloc_buffer) (struct usb_ep *ep, unsigned bytes, - dma_addr_t *dma, gfp_t gfp_flags); - void (*free_buffer) (struct usb_ep *ep, void *buf, dma_addr_t dma, - unsigned bytes); - // NOTE: on 2.6, drivers may also use dma_map() and - // dma_sync_single_*() to directly manage dma overhead. - int (*queue) (struct usb_ep *ep, struct usb_request *req, gfp_t gfp_flags); int (*dequeue) (struct usb_ep *ep, struct usb_request *req); -- cgit v1.2.3 From 7405f74badf46b5d023c5d2b670b4471525f6c91 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 2 Jan 2007 11:10:43 -0700 Subject: dmaengine: refactor dmaengine around dma_async_tx_descriptor The current dmaengine interface defines mutliple routines per operation, i.e. dma_async_memcpy_buf_to_buf, dma_async_memcpy_buf_to_page etc. Adding more operation types (xor, crc, etc) to this model would result in an unmanageable number of method permutations. Are we really going to add a set of hooks for each DMA engine whizbang feature? - Jeff Garzik The descriptor creation process is refactored using the new common dma_async_tx_descriptor structure. Instead of per driver do___to_ methods, drivers integrate dma_async_tx_descriptor into their private software descriptor and then define a 'prep' routine per operation. The prep routine allocates a descriptor and ensures that the tx_set_src, tx_set_dest, tx_submit routines are valid. Descriptor creation and submission becomes: struct dma_device *dev; struct dma_chan *chan; struct dma_async_tx_descriptor *tx; tx = dev->device_prep_dma_(chan, len, int_flag) tx->tx_set_src(dma_addr_t, tx, index /* for multi-source ops */) tx->tx_set_dest(dma_addr_t, tx, index) tx->tx_submit(tx) In addition to the refactoring, dma_async_tx_descriptor also lays the groundwork for definining cross-channel-operation dependencies, and a callback facility for asynchronous notification of operation completion. Changelog: * drop dma mapping methods, suggested by Chris Leech * fix ioat_dma_dependency_added, also caught by Andrew Morton * fix dma_sync_wait, change from Andrew Morton * uninline large functions, change from Andrew Morton * add tx->callback = NULL to dmaengine calls to interoperate with async_tx calls * hookup ioat_tx_submit * convert channel capabilities to a 'cpumask_t like' bitmap * removed DMA_TX_ARRAY_INIT, no longer needed * checkpatch.pl fixes * make set_src, set_dest, and tx_submit descriptor specific methods * fixup git-ioat merge * move group_list and phys to dma_async_tx_descriptor Cc: Jeff Garzik Cc: Chris Leech Signed-off-by: Shannon Nelson Signed-off-by: Dan Williams Acked-by: David S. Miller --- include/linux/dmaengine.h | 237 +++++++++++++++++++++++++++++----------------- 1 file changed, 149 insertions(+), 88 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c94d8f1d62e5..3de1cf71031a 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -21,13 +21,12 @@ #ifndef DMAENGINE_H #define DMAENGINE_H -#ifdef CONFIG_DMA_ENGINE - #include #include #include #include #include +#include /** * enum dma_event - resource PNP/power managment events @@ -64,6 +63,31 @@ enum dma_status { DMA_ERROR, }; +/** + * enum dma_transaction_type - DMA transaction types/indexes + */ +enum dma_transaction_type { + DMA_MEMCPY, + DMA_XOR, + DMA_PQ_XOR, + DMA_DUAL_XOR, + DMA_PQ_UPDATE, + DMA_ZERO_SUM, + DMA_PQ_ZERO_SUM, + DMA_MEMSET, + DMA_MEMCPY_CRC32C, + DMA_INTERRUPT, +}; + +/* last transaction type for creation of the capabilities mask */ +#define DMA_TX_TYPE_END (DMA_INTERRUPT + 1) + +/** + * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. + * See linux/cpumask.h + */ +typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t; + /** * struct dma_chan_percpu - the per-CPU part of struct dma_chan * @refcount: local_t used for open-coded "bigref" counting @@ -157,48 +181,106 @@ struct dma_client { struct list_head global_node; }; +typedef void (*dma_async_tx_callback)(void *dma_async_param); +/** + * struct dma_async_tx_descriptor - async transaction descriptor + * ---dma generic offload fields--- + * @cookie: tracking cookie for this transaction, set to -EBUSY if + * this tx is sitting on a dependency list + * @ack: the descriptor can not be reused until the client acknowledges + * receipt, i.e. has has a chance to establish any dependency chains + * @phys: physical address of the descriptor + * @tx_list: driver common field for operations that require multiple + * descriptors + * @chan: target channel for this operation + * @tx_submit: set the prepared descriptor(s) to be executed by the engine + * @tx_set_dest: set a destination address in a hardware descriptor + * @tx_set_src: set a source address in a hardware descriptor + * @callback: routine to call after this operation is complete + * @callback_param: general parameter to pass to the callback routine + * ---async_tx api specific fields--- + * @depend_list: at completion this list of transactions are submitted + * @depend_node: allow this transaction to be executed after another + * transaction has completed, possibly on another channel + * @parent: pointer to the next level up in the dependency chain + * @lock: protect the dependency list + */ +struct dma_async_tx_descriptor { + dma_cookie_t cookie; + int ack; + dma_addr_t phys; + struct list_head tx_list; + struct dma_chan *chan; + dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx); + void (*tx_set_dest)(dma_addr_t addr, + struct dma_async_tx_descriptor *tx, int index); + void (*tx_set_src)(dma_addr_t addr, + struct dma_async_tx_descriptor *tx, int index); + dma_async_tx_callback callback; + void *callback_param; + struct list_head depend_list; + struct list_head depend_node; + struct dma_async_tx_descriptor *parent; + spinlock_t lock; +}; + /** * struct dma_device - info on the entity supplying DMA services * @chancnt: how many DMA channels are supported * @channels: the list of struct dma_chan * @global_node: list_head for global dma_device_list + * @cap_mask: one or more dma_capability flags + * @max_xor: maximum number of xor sources, 0 if no capability * @refcount: reference count * @done: IO completion struct * @dev_id: unique device ID + * @dev: struct device reference for dma mapping api * @device_alloc_chan_resources: allocate resources and return the * number of allocated descriptors * @device_free_chan_resources: release DMA channel's resources - * @device_memcpy_buf_to_buf: memcpy buf pointer to buf pointer - * @device_memcpy_buf_to_pg: memcpy buf pointer to struct page - * @device_memcpy_pg_to_pg: memcpy struct page/offset to struct page/offset - * @device_memcpy_complete: poll the status of an IOAT DMA transaction - * @device_memcpy_issue_pending: push appended descriptors to hardware + * @device_prep_dma_memcpy: prepares a memcpy operation + * @device_prep_dma_xor: prepares a xor operation + * @device_prep_dma_zero_sum: prepares a zero_sum operation + * @device_prep_dma_memset: prepares a memset operation + * @device_prep_dma_interrupt: prepares an end of chain interrupt operation + * @device_dependency_added: async_tx notifies the channel about new deps + * @device_issue_pending: push pending transactions to hardware */ struct dma_device { unsigned int chancnt; struct list_head channels; struct list_head global_node; + dma_cap_mask_t cap_mask; + int max_xor; struct kref refcount; struct completion done; int dev_id; + struct device *dev; int (*device_alloc_chan_resources)(struct dma_chan *chan); void (*device_free_chan_resources)(struct dma_chan *chan); - dma_cookie_t (*device_memcpy_buf_to_buf)(struct dma_chan *chan, - void *dest, void *src, size_t len); - dma_cookie_t (*device_memcpy_buf_to_pg)(struct dma_chan *chan, - struct page *page, unsigned int offset, void *kdata, - size_t len); - dma_cookie_t (*device_memcpy_pg_to_pg)(struct dma_chan *chan, - struct page *dest_pg, unsigned int dest_off, - struct page *src_pg, unsigned int src_off, size_t len); - enum dma_status (*device_memcpy_complete)(struct dma_chan *chan, + + struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)( + struct dma_chan *chan, size_t len, int int_en); + struct dma_async_tx_descriptor *(*device_prep_dma_xor)( + struct dma_chan *chan, unsigned int src_cnt, size_t len, + int int_en); + struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)( + struct dma_chan *chan, unsigned int src_cnt, size_t len, + u32 *result, int int_en); + struct dma_async_tx_descriptor *(*device_prep_dma_memset)( + struct dma_chan *chan, int value, size_t len, int int_en); + struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)( + struct dma_chan *chan); + + void (*device_dependency_added)(struct dma_chan *chan); + enum dma_status (*device_is_tx_complete)(struct dma_chan *chan, dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used); - void (*device_memcpy_issue_pending)(struct dma_chan *chan); + void (*device_issue_pending)(struct dma_chan *chan); }; /* --- public DMA engine API --- */ @@ -207,96 +289,72 @@ struct dma_client *dma_async_client_register(dma_event_callback event_callback); void dma_async_client_unregister(struct dma_client *client); void dma_async_client_chan_request(struct dma_client *client, unsigned int number); +dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, + void *dest, void *src, size_t len); +dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, + struct page *page, unsigned int offset, void *kdata, size_t len); +dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan, + struct page *dest_pg, unsigned int dest_off, struct page *src_pg, + unsigned int src_off, size_t len); +void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, + struct dma_chan *chan); -/** - * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses - * @chan: DMA channel to offload copy to - * @dest: destination address (virtual) - * @src: source address (virtual) - * @len: length - * - * Both @dest and @src must be mappable to a bus address according to the - * DMA mapping API rules for streaming mappings. - * Both @dest and @src must stay memory resident (kernel memory or locked - * user space pages). - */ -static inline dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, - void *dest, void *src, size_t len) -{ - int cpu = get_cpu(); - per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; - per_cpu_ptr(chan->local, cpu)->memcpy_count++; - put_cpu(); - return chan->device->device_memcpy_buf_to_buf(chan, dest, src, len); +static inline void +async_tx_ack(struct dma_async_tx_descriptor *tx) +{ + tx->ack = 1; } -/** - * dma_async_memcpy_buf_to_pg - offloaded copy from address to page - * @chan: DMA channel to offload copy to - * @page: destination page - * @offset: offset in page to copy to - * @kdata: source address (virtual) - * @len: length - * - * Both @page/@offset and @kdata must be mappable to a bus address according - * to the DMA mapping API rules for streaming mappings. - * Both @page/@offset and @kdata must stay memory resident (kernel memory or - * locked user space pages) - */ -static inline dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, - struct page *page, unsigned int offset, void *kdata, size_t len) +#define first_dma_cap(mask) __first_dma_cap(&(mask)) +static inline int __first_dma_cap(const dma_cap_mask_t *srcp) { - int cpu = get_cpu(); - per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; - per_cpu_ptr(chan->local, cpu)->memcpy_count++; - put_cpu(); + return min_t(int, DMA_TX_TYPE_END, + find_first_bit(srcp->bits, DMA_TX_TYPE_END)); +} - return chan->device->device_memcpy_buf_to_pg(chan, page, offset, - kdata, len); +#define next_dma_cap(n, mask) __next_dma_cap((n), &(mask)) +static inline int __next_dma_cap(int n, const dma_cap_mask_t *srcp) +{ + return min_t(int, DMA_TX_TYPE_END, + find_next_bit(srcp->bits, DMA_TX_TYPE_END, n+1)); } -/** - * dma_async_memcpy_pg_to_pg - offloaded copy from page to page - * @chan: DMA channel to offload copy to - * @dest_pg: destination page - * @dest_off: offset in page to copy to - * @src_pg: source page - * @src_off: offset in page to copy from - * @len: length - * - * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus - * address according to the DMA mapping API rules for streaming mappings. - * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident - * (kernel memory or locked user space pages). - */ -static inline dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan, - struct page *dest_pg, unsigned int dest_off, struct page *src_pg, - unsigned int src_off, size_t len) +#define dma_cap_set(tx, mask) __dma_cap_set((tx), &(mask)) +static inline void +__dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp) { - int cpu = get_cpu(); - per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; - per_cpu_ptr(chan->local, cpu)->memcpy_count++; - put_cpu(); + set_bit(tx_type, dstp->bits); +} - return chan->device->device_memcpy_pg_to_pg(chan, dest_pg, dest_off, - src_pg, src_off, len); +#define dma_has_cap(tx, mask) __dma_has_cap((tx), &(mask)) +static inline int +__dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp) +{ + return test_bit(tx_type, srcp->bits); } +#define for_each_dma_cap_mask(cap, mask) \ + for ((cap) = first_dma_cap(mask); \ + (cap) < DMA_TX_TYPE_END; \ + (cap) = next_dma_cap((cap), (mask))) + /** - * dma_async_memcpy_issue_pending - flush pending copies to HW + * dma_async_issue_pending - flush pending transactions to HW * @chan: target DMA channel * * This allows drivers to push copies to HW in batches, * reducing MMIO writes where possible. */ -static inline void dma_async_memcpy_issue_pending(struct dma_chan *chan) +static inline void dma_async_issue_pending(struct dma_chan *chan) { - return chan->device->device_memcpy_issue_pending(chan); + return chan->device->device_issue_pending(chan); } +#define dma_async_memcpy_issue_pending(chan) dma_async_issue_pending(chan) + /** - * dma_async_memcpy_complete - poll for transaction completion + * dma_async_is_tx_complete - poll for transaction completion * @chan: DMA channel * @cookie: transaction identifier to check status of * @last: returns last completed cookie, can be NULL @@ -306,12 +364,15 @@ static inline void dma_async_memcpy_issue_pending(struct dma_chan *chan) * internal state and can be used with dma_async_is_complete() to check * the status of multiple cookies without re-checking hardware state. */ -static inline enum dma_status dma_async_memcpy_complete(struct dma_chan *chan, +static inline enum dma_status dma_async_is_tx_complete(struct dma_chan *chan, dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used) { - return chan->device->device_memcpy_complete(chan, cookie, last, used); + return chan->device->device_is_tx_complete(chan, cookie, last, used); } +#define dma_async_memcpy_complete(chan, cookie, last, used)\ + dma_async_is_tx_complete(chan, cookie, last, used) + /** * dma_async_is_complete - test a cookie against chan state * @cookie: transaction identifier to test status of @@ -334,6 +395,7 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie, return DMA_IN_PROGRESS; } +enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie); /* --- DMA device --- */ @@ -362,5 +424,4 @@ dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov, struct dma_pinned_list *pinned_list, struct page *page, unsigned int offset, size_t len); -#endif /* CONFIG_DMA_ENGINE */ #endif /* DMAENGINE_H */ -- cgit v1.2.3 From d379b01e9087a582d58f4b678208a4f8d8376fe7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 9 Jul 2007 11:56:42 -0700 Subject: dmaengine: make clients responsible for managing channels The current implementation assumes that a channel will only be used by one client at a time. In order to enable channel sharing the dmaengine core is changed to a model where clients subscribe to channel-available-events. Instead of tracking how many channels a client wants and how many it has received the core just broadcasts the available channels and lets the clients optionally take a reference. The core learns about the clients' needs at dma_event_callback time. In support of multiple operation types, clients can specify a capability mask to only be notified of channels that satisfy a certain set of capabilities. Changelog: * removed DMA_TX_ARRAY_INIT, no longer needed * dma_client_chan_free -> dma_chan_release: switch to global reference counting only at device unregistration time, before it was also happening at client unregistration time * clients now return dma_state_client to dmaengine (ack, dup, nak) * checkpatch.pl fixes * fixup merge with git-ioat Cc: Chris Leech Signed-off-by: Shannon Nelson Signed-off-by: Dan Williams Acked-by: David S. Miller --- include/linux/dmaengine.h | 58 ++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 3de1cf71031a..a3b6035b6c86 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -29,19 +29,31 @@ #include /** - * enum dma_event - resource PNP/power managment events + * enum dma_state - resource PNP/power managment state * @DMA_RESOURCE_SUSPEND: DMA device going into low power state * @DMA_RESOURCE_RESUME: DMA device returning to full power - * @DMA_RESOURCE_ADDED: DMA device added to the system + * @DMA_RESOURCE_AVAILABLE: DMA device available to the system * @DMA_RESOURCE_REMOVED: DMA device removed from the system */ -enum dma_event { +enum dma_state { DMA_RESOURCE_SUSPEND, DMA_RESOURCE_RESUME, - DMA_RESOURCE_ADDED, + DMA_RESOURCE_AVAILABLE, DMA_RESOURCE_REMOVED, }; +/** + * enum dma_state_client - state of the channel in the client + * @DMA_ACK: client would like to use, or was using this channel + * @DMA_DUP: client has already seen this channel, or is not using this channel + * @DMA_NAK: client does not want to see any more channels + */ +enum dma_state_client { + DMA_ACK, + DMA_DUP, + DMA_NAK, +}; + /** * typedef dma_cookie_t - an opaque DMA cookie * @@ -104,7 +116,6 @@ struct dma_chan_percpu { /** * struct dma_chan - devices supply DMA channels, clients use them - * @client: ptr to the client user of this chan, will be %NULL when unused * @device: ptr to the dma device who supplies this channel, always !%NULL * @cookie: last cookie value returned to client * @chan_id: channel ID for sysfs @@ -112,12 +123,10 @@ struct dma_chan_percpu { * @refcount: kref, used in "bigref" slow-mode * @slow_ref: indicates that the DMA channel is free * @rcu: the DMA channel's RCU head - * @client_node: used to add this to the client chan list * @device_node: used to add this to the device chan list * @local: per-cpu pointer to a struct dma_chan_percpu */ struct dma_chan { - struct dma_client *client; struct dma_device *device; dma_cookie_t cookie; @@ -129,11 +138,11 @@ struct dma_chan { int slow_ref; struct rcu_head rcu; - struct list_head client_node; struct list_head device_node; struct dma_chan_percpu *local; }; + void dma_chan_cleanup(struct kref *kref); static inline void dma_chan_get(struct dma_chan *chan) @@ -158,26 +167,31 @@ static inline void dma_chan_put(struct dma_chan *chan) /* * typedef dma_event_callback - function pointer to a DMA event callback + * For each channel added to the system this routine is called for each client. + * If the client would like to use the channel it returns '1' to signal (ack) + * the dmaengine core to take out a reference on the channel and its + * corresponding device. A client must not 'ack' an available channel more + * than once. When a channel is removed all clients are notified. If a client + * is using the channel it must 'ack' the removal. A client must not 'ack' a + * removed channel more than once. + * @client - 'this' pointer for the client context + * @chan - channel to be acted upon + * @state - available or removed */ -typedef void (*dma_event_callback) (struct dma_client *client, - struct dma_chan *chan, enum dma_event event); +struct dma_client; +typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, + struct dma_chan *chan, enum dma_state state); /** * struct dma_client - info on the entity making use of DMA services * @event_callback: func ptr to call when something happens - * @chan_count: number of chans allocated - * @chans_desired: number of chans requested. Can be +/- chan_count - * @lock: protects access to the channels list - * @channels: the list of DMA channels allocated + * @cap_mask: only return channels that satisfy the requested capabilities + * a value of zero corresponds to any capability * @global_node: list_head for global dma_client_list */ struct dma_client { dma_event_callback event_callback; - unsigned int chan_count; - unsigned int chans_desired; - - spinlock_t lock; - struct list_head channels; + dma_cap_mask_t cap_mask; struct list_head global_node; }; @@ -285,10 +299,9 @@ struct dma_device { /* --- public DMA engine API --- */ -struct dma_client *dma_async_client_register(dma_event_callback event_callback); +void dma_async_client_register(struct dma_client *client); void dma_async_client_unregister(struct dma_client *client); -void dma_async_client_chan_request(struct dma_client *client, - unsigned int number); +void dma_async_client_chan_request(struct dma_client *client); dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, @@ -299,7 +312,6 @@ dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan, void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, struct dma_chan *chan); - static inline void async_tx_ack(struct dma_async_tx_descriptor *tx) { -- cgit v1.2.3 From 685784aaf3cd0e3ff5e36c7ecf6f441cdbf57f73 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 9 Jul 2007 11:56:42 -0700 Subject: xor: make 'xor_blocks' a library routine for use with async_tx The async_tx api tries to use a dma engine for an operation, but will fall back to an optimized software routine otherwise. Xor support is implemented using the raid5 xor routines. For organizational purposes this routine is moved to a common area. The following fixes are also made: * rename xor_block => xor_blocks, suggested by Adrian Bunk * ensure that xor.o initializes before md.o in the built-in case * checkpatch.pl fixes * mark calibrate_xor_blocks __init, Adrian Bunk Cc: Adrian Bunk Cc: NeilBrown Cc: Herbert Xu Signed-off-by: Dan Williams --- include/linux/raid/xor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h index f0d67cbdea40..7d6c20b654fa 100644 --- a/include/linux/raid/xor.h +++ b/include/linux/raid/xor.h @@ -5,7 +5,7 @@ #define MAX_XOR_BLOCKS 5 -extern void xor_block(unsigned int count, unsigned int bytes, void **ptr); +extern void xor_blocks(unsigned int count, unsigned int bytes, void **ptr); struct xor_block_template { struct xor_block_template *next; -- cgit v1.2.3 From 9bc89cd82d6f88fb0ca39b30445c329a430fd66b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 2 Jan 2007 11:10:44 -0700 Subject: async_tx: add the async_tx api The async_tx api provides methods for describing a chain of asynchronous bulk memory transfers/transforms with support for inter-transactional dependencies. It is implemented as a dmaengine client that smooths over the details of different hardware offload engine implementations. Code that is written to the api can optimize for asynchronous operation and the api will fit the chain of operations to the available offload resources. I imagine that any piece of ADMA hardware would register with the 'async_*' subsystem, and a call to async_X would be routed as appropriate, or be run in-line. - Neil Brown async_tx exploits the capabilities of struct dma_async_tx_descriptor to provide an api of the following general format: struct dma_async_tx_descriptor * async_(..., struct dma_async_tx_descriptor *depend_tx, dma_async_tx_callback cb_fn, void *cb_param) { struct dma_chan *chan = async_tx_find_channel(depend_tx, ); struct dma_device *device = chan ? chan->device : NULL; int int_en = cb_fn ? 1 : 0; struct dma_async_tx_descriptor *tx = device ? device->device_prep_dma_(chan, len, int_en) : NULL; if (tx) { /* run asynchronously */ ... tx->tx_set_dest(addr, tx, index); ... tx->tx_set_src(addr, tx, index); ... async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); } else { /* run synchronously */ ... ... async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); } return tx; } async_tx_find_channel() returns a capable channel from its pool. The channel pool is organized as a per-cpu array of channel pointers. The async_tx_rebalance() routine is tasked with managing these arrays. In the uniprocessor case async_tx_rebalance() tries to spread responsibility evenly over channels of similar capabilities. For example if there are two copy+xor channels, one will handle copy operations and the other will handle xor. In the SMP case async_tx_rebalance() attempts to spread the operations evenly over the cpus, e.g. cpu0 gets copy channel0 and xor channel0 while cpu1 gets copy channel 1 and xor channel 1. When a dependency is specified async_tx_find_channel defaults to keeping the operation on the same channel. A xor->copy->xor chain will stay on one channel if it supports both operation types, otherwise the transaction will transition between a copy and a xor resource. Currently the raid5 implementation in the MD raid456 driver has been converted to the async_tx api. A driver for the offload engines on the Intel Xscale series of I/O processors, iop-adma, is provided in a later commit. With the iop-adma driver and async_tx, raid456 is able to offload copy, xor, and xor-zero-sum operations to hardware engines. On iop342 tiobench showed higher throughput for sequential writes (20 - 30% improvement) and sequential reads to a degraded array (40 - 55% improvement). For the other cases performance was roughly equal, +/- a few percentage points. On a x86-smp platform the performance of the async_tx implementation (in synchronous mode) was also +/- a few percentage points of the original implementation. According to 'top' on iop342 CPU utilization drops from ~50% to ~15% during a 'resync' while the speed according to /proc/mdstat doubles from ~25 MB/s to ~50 MB/s. The tiobench command line used for testing was: tiobench --size 2048 --block 4096 --block 131072 --dir /mnt/raid --numruns 5 * iop342 had 1GB of memory available Details: * if CONFIG_DMA_ENGINE=n the asynchronous path is compiled away by making async_tx_find_channel a static inline routine that always returns NULL * when a callback is specified for a given transaction an interrupt will fire at operation completion time and the callback will occur in a tasklet. if the the channel does not support interrupts then a live polling wait will be performed * the api is written as a dmaengine client that requests all available channels * In support of dependencies the api implicitly schedules channel-switch interrupts. The interrupt triggers the cleanup tasklet which causes pending operations to be scheduled on the next channel * Xor engines treat an xor destination address differently than a software xor routine. To the software routine the destination address is an implied source, whereas engines treat it as a write-only destination. This patch modifies the xor_blocks routine to take a an explicit destination address to mirror the hardware. Changelog: * fixed a leftover debug print * don't allow callbacks in async_interrupt_cond * fixed xor_block changes * fixed usage of ASYNC_TX_XOR_DROP_DEST * drop dma mapping methods, suggested by Chris Leech * printk warning fixups from Andrew Morton * don't use inline in C files, Adrian Bunk * select the API when MD is enabled * BUG_ON xor source counts <= 1 * implicitly handle hardware concerns like channel switching and interrupts, Neil Brown * remove the per operation type list, and distribute operation capabilities evenly amongst the available channels * simplify async_tx_find_channel to optimize the fast path * introduce the channel_table_initialized flag to prevent early calls to the api * reorganize the code to mimic crypto * include mm.h as not all archs include it in dma-mapping.h * make the Kconfig options non-user visible, Adrian Bunk * move async_tx under crypto since it is meant as 'core' functionality, and the two may share algorithms in the future * move large inline functions into c files * checkpatch.pl fixes * gpl v2 only correction Cc: Herbert Xu Signed-off-by: Dan Williams Acked-By: NeilBrown --- include/linux/async_tx.h | 156 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/raid/xor.h | 5 +- 2 files changed, 159 insertions(+), 2 deletions(-) create mode 100644 include/linux/async_tx.h (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h new file mode 100644 index 000000000000..ff1255079fa1 --- /dev/null +++ b/include/linux/async_tx.h @@ -0,0 +1,156 @@ +/* + * Copyright © 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#ifndef _ASYNC_TX_H_ +#define _ASYNC_TX_H_ +#include +#include +#include + +/** + * dma_chan_ref - object used to manage dma channels received from the + * dmaengine core. + * @chan - the channel being tracked + * @node - node for the channel to be placed on async_tx_master_list + * @rcu - for list_del_rcu + * @count - number of times this channel is listed in the pool + * (for channels with multiple capabiities) + */ +struct dma_chan_ref { + struct dma_chan *chan; + struct list_head node; + struct rcu_head rcu; + atomic_t count; +}; + +/** + * async_tx_flags - modifiers for the async_* calls + * @ASYNC_TX_XOR_ZERO_DST: this flag must be used for xor operations where the + * the destination address is not a source. The asynchronous case handles this + * implicitly, the synchronous case needs to zero the destination block. + * @ASYNC_TX_XOR_DROP_DST: this flag must be used if the destination address is + * also one of the source addresses. In the synchronous case the destination + * address is an implied source, whereas the asynchronous case it must be listed + * as a source. The destination address must be the first address in the source + * array. + * @ASYNC_TX_ASSUME_COHERENT: skip cache maintenance operations + * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a + * dependency chain + * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. + * @ASYNC_TX_KMAP_SRC: if the transaction is to be performed synchronously + * take an atomic mapping (KM_USER0) on the source page(s) + * @ASYNC_TX_KMAP_DST: if the transaction is to be performed synchronously + * take an atomic mapping (KM_USER0) on the dest page(s) + */ +enum async_tx_flags { + ASYNC_TX_XOR_ZERO_DST = (1 << 0), + ASYNC_TX_XOR_DROP_DST = (1 << 1), + ASYNC_TX_ASSUME_COHERENT = (1 << 2), + ASYNC_TX_ACK = (1 << 3), + ASYNC_TX_DEP_ACK = (1 << 4), + ASYNC_TX_KMAP_SRC = (1 << 5), + ASYNC_TX_KMAP_DST = (1 << 6), +}; + +#ifdef CONFIG_DMA_ENGINE +void async_tx_issue_pending_all(void); +enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx); +void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx); +struct dma_chan * +async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, + enum dma_transaction_type tx_type); +#else +static inline void async_tx_issue_pending_all(void) +{ + do { } while (0); +} + +static inline enum dma_status +dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) +{ + return DMA_SUCCESS; +} + +static inline void +async_tx_run_dependencies(struct dma_async_tx_descriptor *tx, + struct dma_chan *host_chan) +{ + do { } while (0); +} + +static inline struct dma_chan * +async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, + enum dma_transaction_type tx_type) +{ + return NULL; +} +#endif + +/** + * async_tx_sync_epilog - actions to take if an operation is run synchronously + * @flags: async_tx flags + * @depend_tx: transaction depends on depend_tx + * @cb_fn: function to call when the transaction completes + * @cb_fn_param: parameter to pass to the callback routine + */ +static inline void +async_tx_sync_epilog(unsigned long flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_fn_param) +{ + if (cb_fn) + cb_fn(cb_fn_param); + + if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) + async_tx_ack(depend_tx); +} + +void +async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, + enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_fn_param); + +struct dma_async_tx_descriptor * +async_xor(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_fn_param); + +struct dma_async_tx_descriptor * +async_xor_zero_sum(struct page *dest, struct page **src_list, + unsigned int offset, int src_cnt, size_t len, + u32 *result, enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_fn_param); + +struct dma_async_tx_descriptor * +async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, + unsigned int src_offset, size_t len, enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_fn_param); + +struct dma_async_tx_descriptor * +async_memset(struct page *dest, int val, unsigned int offset, + size_t len, enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_fn_param); + +struct dma_async_tx_descriptor * +async_trigger_callback(enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_fn_param); +#endif /* _ASYNC_TX_H_ */ diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h index 7d6c20b654fa..3e120587eada 100644 --- a/include/linux/raid/xor.h +++ b/include/linux/raid/xor.h @@ -3,9 +3,10 @@ #include -#define MAX_XOR_BLOCKS 5 +#define MAX_XOR_BLOCKS 4 -extern void xor_blocks(unsigned int count, unsigned int bytes, void **ptr); +extern void xor_blocks(unsigned int count, unsigned int bytes, + void *dest, void **srcs); struct xor_block_template { struct xor_block_template *next; -- cgit v1.2.3 From a445685647e825c713175d180ffc8dd54d90589b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 9 Jul 2007 11:56:43 -0700 Subject: raid5: refactor handle_stripe5 and handle_stripe6 (v3) handle_stripe5 and handle_stripe6 have very deep logic paths handling the various states of a stripe_head. By introducing the 'stripe_head_state' and 'r6_state' objects, large portions of the logic can be moved to sub-routines. 'struct stripe_head_state' consumes all of the automatic variables that previously stood alone in handle_stripe5,6. 'struct r6_state' contains the handle_stripe6 specific variables like p_failed and q_failed. One of the nice side effects of the 'stripe_head_state' change is that it allows for further reductions in code duplication between raid5 and raid6. The following new routines are shared between raid5 and raid6: handle_completed_write_requests handle_requests_to_failed_array handle_stripe_expansion Changes: * v2: fixed 'conf->raid_disk-1' for the raid6 'handle_stripe_expansion' path * v3: removed the unused 'dirty' field from struct stripe_head_state * v3: coalesced open coded bi_end_io routines into return_io() Signed-off-by: Dan Williams Acked-By: NeilBrown --- include/linux/raid/raid5.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index d8286db60b96..b99d354f6128 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -145,6 +145,22 @@ struct stripe_head { unsigned long flags; } dev[1]; /* allocated with extra space depending of RAID geometry */ }; + +/* stripe_head_state - collects and tracks the dynamic state of a stripe_head + * for handle_stripe. It is only valid under spin_lock(sh->lock); + */ +struct stripe_head_state { + int syncing, expanding, expanded; + int locked, uptodate, to_read, to_write, failed, written; + int non_overwrite; + int failed_num; +}; + +/* r6_state - extra state data only relevant to r6 */ +struct r6_state { + int p_failed, q_failed, qd_idx, failed_num[2]; +}; + /* Flags */ #define R5_UPTODATE 0 /* page contains current data */ #define R5_LOCKED 1 /* IO has been submitted on "req" */ -- cgit v1.2.3 From 91c00924846a0034020451c280c76baa4299f9dc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 2 Jan 2007 13:52:30 -0700 Subject: md: raid5_run_ops - run stripe operations outside sh->lock When the raid acceleration work was proposed, Neil laid out the following attack plan: 1/ move the xor and copy operations outside spin_lock(&sh->lock) 2/ find/implement an asynchronous offload api The raid5_run_ops routine uses the asynchronous offload api (async_tx) and the stripe_operations member of a stripe_head to carry out xor+copy operations asynchronously, outside the lock. To perform operations outside the lock a new set of state flags is needed to track new requests, in-flight requests, and completed requests. In this new model handle_stripe is tasked with scanning the stripe_head for work, updating the stripe_operations structure, and finally dropping the lock and calling raid5_run_ops for processing. The following flags outline the requests that handle_stripe can make of raid5_run_ops: STRIPE_OP_BIOFILL - copy data into request buffers to satisfy a read request STRIPE_OP_COMPUTE_BLK - generate a missing block in the cache from the other blocks STRIPE_OP_PREXOR - subtract existing data as part of the read-modify-write process STRIPE_OP_BIODRAIN - copy data out of request buffers to satisfy a write request STRIPE_OP_POSTXOR - recalculate parity for new data that has entered the cache STRIPE_OP_CHECK - verify that the parity is correct STRIPE_OP_IO - submit i/o to the member disks (note this was already performed outside the stripe lock, but it made sense to add it as an operation type The flow is: 1/ handle_stripe sets STRIPE_OP_* in sh->ops.pending 2/ raid5_run_ops reads sh->ops.pending, sets sh->ops.ack, and submits the operation to the async_tx api 3/ async_tx triggers the completion callback routine to set sh->ops.complete and release the stripe 4/ handle_stripe runs again to finish the operation and optionally submit new operations that were previously blocked Note this patch just defines raid5_run_ops, subsequent commits (one per major operation type) modify handle_stripe to take advantage of this routine. Changelog: * removed ops_complete_biodrain in favor of ops_complete_postxor and ops_complete_write. * removed the raid5_run_ops workqueue * call bi_end_io for reads in ops_complete_biofill, saves a call to handle_stripe * explicitly handle the 2-disk raid5 case (xor becomes memcpy), Neil Brown * fix race between async engines and bi_end_io call for reads, Neil Brown * remove unnecessary spin_lock from ops_complete_biofill * remove test_and_set/test_and_clear BUG_ONs, Neil Brown * remove explicit interrupt handling for channel switching, this feature was absorbed (i.e. it is now implicit) by the async_tx api * use return_io in ops_complete_biofill Signed-off-by: Dan Williams Acked-By: NeilBrown --- include/linux/raid/raid5.h | 81 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index b99d354f6128..6fb9d94e6f2e 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -116,13 +116,46 @@ * attach a request to an active stripe (add_stripe_bh()) * lockdev attach-buffer unlockdev * handle a stripe (handle_stripe()) - * lockstripe clrSTRIPE_HANDLE ... (lockdev check-buffers unlockdev) .. change-state .. record io needed unlockstripe schedule io + * lockstripe clrSTRIPE_HANDLE ... + * (lockdev check-buffers unlockdev) .. + * change-state .. + * record io/ops needed unlockstripe schedule io/ops * release an active stripe (release_stripe()) * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev * * The refcount counts each thread that have activated the stripe, * plus raid5d if it is handling it, plus one for each active request - * on a cached buffer. + * on a cached buffer, and plus one if the stripe is undergoing stripe + * operations. + * + * Stripe operations are performed outside the stripe lock, + * the stripe operations are: + * -copying data between the stripe cache and user application buffers + * -computing blocks to save a disk access, or to recover a missing block + * -updating the parity on a write operation (reconstruct write and + * read-modify-write) + * -checking parity correctness + * -running i/o to disk + * These operations are carried out by raid5_run_ops which uses the async_tx + * api to (optionally) offload operations to dedicated hardware engines. + * When requesting an operation handle_stripe sets the pending bit for the + * operation and increments the count. raid5_run_ops is then run whenever + * the count is non-zero. + * There are some critical dependencies between the operations that prevent some + * from being requested while another is in flight. + * 1/ Parity check operations destroy the in cache version of the parity block, + * so we prevent parity dependent operations like writes and compute_blocks + * from starting while a check is in progress. Some dma engines can perform + * the check without damaging the parity block, in these cases the parity + * block is re-marked up to date (assuming the check was successful) and is + * not re-read from disk. + * 2/ When a write operation is requested we immediately lock the affected + * blocks, and mark them as not up to date. This causes new read requests + * to be held off, as well as parity checks and compute block operations. + * 3/ Once a compute block operation has been requested handle_stripe treats + * that block as if it is up to date. raid5_run_ops guaruntees that any + * operation that is dependent on the compute block result is initiated after + * the compute block completes. */ struct stripe_head { @@ -136,11 +169,26 @@ struct stripe_head { spinlock_t lock; int bm_seq; /* sequence number for bitmap flushes */ int disks; /* disks in stripe */ + /* stripe_operations + * @pending - pending ops flags (set for request->issue->complete) + * @ack - submitted ops flags (set for issue->complete) + * @complete - completed ops flags (set for complete) + * @target - STRIPE_OP_COMPUTE_BLK target + * @count - raid5_runs_ops is set to run when this is non-zero + */ + struct stripe_operations { + unsigned long pending; + unsigned long ack; + unsigned long complete; + int target; + int count; + u32 zero_sum_result; + } ops; struct r5dev { struct bio req; struct bio_vec vec; struct page *page; - struct bio *toread, *towrite, *written; + struct bio *toread, *read, *towrite, *written; sector_t sector; /* sector of this page */ unsigned long flags; } dev[1]; /* allocated with extra space depending of RAID geometry */ @@ -174,6 +222,15 @@ struct r6_state { #define R5_ReWrite 9 /* have tried to over-write the readerror */ #define R5_Expanded 10 /* This block now has post-expand data */ +#define R5_Wantcompute 11 /* compute_block in progress treat as + * uptodate + */ +#define R5_Wantfill 12 /* dev->toread contains a bio that needs + * filling + */ +#define R5_Wantprexor 13 /* distinguish blocks ready for rmw from + * other "towrites" + */ /* * Write method */ @@ -195,6 +252,24 @@ struct r6_state { #define STRIPE_EXPANDING 9 #define STRIPE_EXPAND_SOURCE 10 #define STRIPE_EXPAND_READY 11 +/* + * Operations flags (in issue order) + */ +#define STRIPE_OP_BIOFILL 0 +#define STRIPE_OP_COMPUTE_BLK 1 +#define STRIPE_OP_PREXOR 2 +#define STRIPE_OP_BIODRAIN 3 +#define STRIPE_OP_POSTXOR 4 +#define STRIPE_OP_CHECK 5 +#define STRIPE_OP_IO 6 + +/* modifiers to the base operations + * STRIPE_OP_MOD_REPAIR_PD - compute the parity block and write it back + * STRIPE_OP_MOD_DMA_CHECK - parity is not corrupted by the check + */ +#define STRIPE_OP_MOD_REPAIR_PD 7 +#define STRIPE_OP_MOD_DMA_CHECK 8 + /* * Plugging: * -- cgit v1.2.3 From f38e12199a94ca458e4f03c5a2c984fb80adadc5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 2 Jan 2007 13:52:30 -0700 Subject: md: handle_stripe5 - add request/completion logic for async compute ops handle_stripe will compute a block when a backing disk has failed, or when it determines it can save a disk read by computing the block from all the other up-to-date blocks. Previously a block would be computed under the lock and subsequent logic in handle_stripe could use the newly up-to-date block. With the raid5_run_ops implementation the compute operation is carried out a later time outside the lock. To preserve the old functionality we take advantage of the dependency chain feature of async_tx to flag the block as R5_Wantcompute and then let other parts of handle_stripe operate on the block as if it were up-to-date. raid5_run_ops guarantees that the block will be ready before it is used in another operation. However, this only works in cases where the compute and the dependent operation are scheduled at the same time. If a previous call to handle_stripe sets the R5_Wantcompute flag there is no facility to pass the async_tx dependency chain across successive calls to raid5_run_ops. The req_compute variable protects against this case. Changelog: * remove the req_compute BUG_ON Signed-off-by: Dan Williams Acked-By: NeilBrown --- include/linux/raid/raid5.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 6fb9d94e6f2e..2293015de1d5 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -200,7 +200,7 @@ struct stripe_head { struct stripe_head_state { int syncing, expanding, expanded; int locked, uptodate, to_read, to_write, failed, written; - int non_overwrite; + int compute, req_compute, non_overwrite; int failed_num; }; -- cgit v1.2.3 From b5e98d65d34a1c11a2135ea8a9b2619dbc7216c8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 2 Jan 2007 13:52:31 -0700 Subject: md: handle_stripe5 - add request/completion logic for async read ops When a read bio is attached to the stripe and the corresponding block is marked R5_UPTODATE, then a read (biofill) operation is scheduled to copy the data from the stripe cache to the bio buffer. handle_stripe flags the blocks to be operated on with the R5_Wantfill flag. If new read requests arrive while raid5_run_ops is running they will not be handled until handle_stripe is scheduled to run again. Changelog: * cleanup to_read and to_fill accounting * do not fail reads that have reached the cache Signed-off-by: Dan Williams Acked-By: NeilBrown --- include/linux/raid/raid5.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 2293015de1d5..93678f57ccbe 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -200,7 +200,7 @@ struct stripe_head { struct stripe_head_state { int syncing, expanding, expanded; int locked, uptodate, to_read, to_write, failed, written; - int compute, req_compute, non_overwrite; + int to_fill, compute, req_compute, non_overwrite; int failed_num; }; -- cgit v1.2.3 From 3039f0735a280b54c7364fbfe6a9287f7f0b510a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 13 Jul 2007 08:06:19 -0700 Subject: ioatdma: add the unisys "i/oat" pci vendor/device id Cc: John Magolan Signed-off-by: Shannon Nelson Signed-off-by: Dan Williams --- include/linux/pci_ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 5b1c9994f89a..0275f6917c8e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -475,6 +475,9 @@ #define PCI_DEVICE_ID_IBM_ICOM_V2_ONE_PORT_RVX_ONE_PORT_MDM_PCIE 0x0361 #define PCI_DEVICE_ID_IBM_ICOM_FOUR_PORT_MODEL 0x252 +#define PCI_VENDOR_ID_UNISYS 0x1018 +#define PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR 0x001C + #define PCI_VENDOR_ID_COMPEX2 0x101a /* pci.ids says "AT&T GIS (NCR)" */ #define PCI_DEVICE_ID_COMPEX2_100VG 0x0005 -- cgit v1.2.3 From f787a50306680c187cf2896a8017937c1bf6dc7e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 11 Jul 2007 21:21:47 +0200 Subject: [PATCH] sched: small topology.h cleanup trivial cleanup: LOCAL_DISTANCE and REMOTE_DISTANCE are only used in topology.h and inside an #ifndef section - limit their existence to that #ifndef. Signed-off-by: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index da6c39b2d051..d0890a7e5bab 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -50,10 +50,10 @@ for_each_online_node(node) \ if (nr_cpus_node(node)) -#ifndef node_distance /* Conform to ACPI 2.0 SLIT distance definitions */ #define LOCAL_DISTANCE 10 #define REMOTE_DISTANCE 20 +#ifndef node_distance #define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE) #endif #ifndef RECLAIM_DISTANCE -- cgit v1.2.3 From 24023451c8df726692e2f52288a20870d13b501f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:51:31 -0700 Subject: [NET]: Add net_device change_rx_mode callback Currently the set_multicast_list (and set_rx_mode) callbacks are responsible for configuring the device according to the IFF_PROMISC, IFF_MULTICAST and IFF_ALLMULTI flags and the mc_list (and uc_list in case of set_rx_mode). These callbacks can be invoked from BH context without the rtnl_mutex by dev_mc_add/dev_mc_delete, which makes reading the device flags and promiscous/allmulti count racy. For real hardware drivers that just commit all changes to the hardware this is not a real problem since the stack guarantees to call them for every change, so at least the final call will not race and commit the correct configuration to the hardware. For software devices that want to synchronize promiscous and multicast state to an underlying device however this can cause corruption of the underlying device's flags or promisc/allmulti counts. When the software device is concurrently put in promiscous or allmulti mode while set_multicast_list is invoked from bottem half context, the device might synchronize the change to the underlying device without holding the rtnl_mutex, which races with concurrent changes to the underlying device. Add a dev->change_rx_flags hook that is invoked when any of the flags that affect rx filtering change (under the rtnl_mutex), which allows drivers to perform synchronization immediately and only synchronize the address lists in set_multicast_list/set_rx_mode. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 79cc3dab4be7..f193aba30384 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -516,6 +516,9 @@ struct net_device void *saddr, unsigned len); int (*rebuild_header)(struct sk_buff *skb); +#define HAVE_CHANGE_RX_FLAGS + void (*change_rx_flags)(struct net_device *dev, + int flags); #define HAVE_SET_RX_MODE void (*set_rx_mode)(struct net_device *dev); #define HAVE_MULTICAST -- cgit v1.2.3 From a0a400d79e3dd7843e7e81baa3ef2957bdc292d0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:52:02 -0700 Subject: [NET]: dev_mcast: add multicast list synchronization helpers The method drivers currently use to synchronize multicast lists is not very pretty: - walk the multicast list - search each entry on a copy of the previous list - if new add to lower device - walk the copy of the previous list - search each entry on the current list - if removed delete from lower device - copy entire list This patch adds a new field to struct dev_addr_list to store the synchronization state and adds two helper functions for synchronization and cleanup. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f193aba30384..e5af458ab04b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -190,6 +190,7 @@ struct dev_addr_list struct dev_addr_list *next; u8 da_addr[MAX_ADDR_LEN]; u8 da_addrlen; + u8 da_synced; int da_users; int da_gusers; }; @@ -1103,6 +1104,8 @@ extern int dev_unicast_delete(struct net_device *dev, void *addr, int alen); extern int dev_unicast_add(struct net_device *dev, void *addr, int alen); extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); +extern int dev_mc_sync(struct net_device *to, struct net_device *from); +extern void dev_mc_unsync(struct net_device *to, struct net_device *from); extern void dev_mc_discard(struct net_device *dev); extern int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all); extern int __dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly); -- cgit v1.2.3 From 6c78dcbd47a68a7d25d2bee7a6c74b9136cb5fde Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:52:56 -0700 Subject: [VLAN]: Fix promiscous/allmulti synchronization races The set_multicast_list function may be called without holding the rtnl mutex, resulting in races when changing the underlying device's promiscous and allmulti state. Use the change_rx_mode hook, which is always invoked under the rtnl. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 61a57dc2ac99..7f71df4c952f 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -132,8 +132,6 @@ struct vlan_dev_info { * made, in order to feed the right changes down * to the real hardware... */ - int old_allmulti; /* similar to above. */ - int old_promiscuity; /* similar to above. */ struct net_device *real_dev; /* the underlying device/interface */ unsigned char real_dev_addr[ETH_ALEN]; struct proc_dir_entry *dent; /* Holds the proc data */ -- cgit v1.2.3 From 56addd6eeeb4e11f5a0af7093ca078e0f29140e0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:53:28 -0700 Subject: [VLAN]: Use multicast list synchronization helpers Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 7f71df4c952f..f8443fdb124a 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -127,11 +127,6 @@ struct vlan_dev_info { * like DHCP that use packet-filtering and don't understand * 802.1Q */ - struct dev_mc_list *old_mc_list; /* old multi-cast list for the VLAN interface.. - * we save this so we can tell what changes were - * made, in order to feed the right changes down - * to the real hardware... - */ struct net_device *real_dev; /* the underlying device/interface */ unsigned char real_dev_addr[ETH_ALEN]; struct proc_dir_entry *dent; /* Holds the proc data */ -- cgit v1.2.3 From b863ceb7ddcea8c55fcf1d7b2ac591d50aa7ed53 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:55:06 -0700 Subject: [NET]: Add macvlan driver Add macvlan driver, which allows to create virtual ethernet devices based on MAC address. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 9 +++++++++ include/linux/netdevice.h | 2 ++ 2 files changed, 11 insertions(+) create mode 100644 include/linux/if_macvlan.h (limited to 'include/linux') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h new file mode 100644 index 000000000000..0d9d7ea2c1cc --- /dev/null +++ b/include/linux/if_macvlan.h @@ -0,0 +1,9 @@ +#ifndef _LINUX_IF_MACVLAN_H +#define _LINUX_IF_MACVLAN_H + +#ifdef __KERNEL__ + +extern struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *); + +#endif /* __KERNEL__ */ +#endif /* _LINUX_IF_MACVLAN_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e5af458ab04b..322b5eae57dd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -564,6 +564,8 @@ struct net_device /* bridge stuff */ struct net_bridge_port *br_port; + /* macvlan */ + struct macvlan_port *macvlan_port; /* class/net/name entry */ struct device dev; -- cgit v1.2.3 From 6460d948f3ebf7d5040328a60a0ab7221f69945b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 14 Jul 2007 19:07:52 -0700 Subject: [NET]: Add ethtool support for NETIF_F_IPV6_CSUM devices. Add ethtool utility function to set or clear IPV6_CSUM feature flag. Modify tg3.c and bnx2.c to use this function when doing ethtool -K to change tx checksum. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- include/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index f2d248f8cc92..3a632244f31b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -265,6 +265,7 @@ u32 ethtool_op_get_link(struct net_device *dev); u32 ethtool_op_get_tx_csum(struct net_device *dev); int ethtool_op_set_tx_csum(struct net_device *dev, u32 data); int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data); +int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data); u32 ethtool_op_get_sg(struct net_device *dev); int ethtool_op_set_sg(struct net_device *dev, u32 data); u32 ethtool_op_get_tso(struct net_device *dev); -- cgit v1.2.3 From 370786f9cfd430cb424f00ce4110e75bb1b95a19 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 14 Jul 2007 20:47:26 -0700 Subject: [NETFILTER]: x_tables: add connlimit match ipt_connlimit has been sitting in POM-NG for a long time. Here is a new shiny xt_connlimit with: * xtables'ified * will request the layer3 module (previously it hotdropped every packet when it was not loaded) * fixed: there was a deadlock in case of an OOM condition * support for any layer4 protocol (e.g. UDP/SCTP) * using jhash, as suggested by Eric Dumazet * ipv6 support Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/xt_connlimit.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 include/linux/netfilter/xt_connlimit.h (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_connlimit.h b/include/linux/netfilter/xt_connlimit.h new file mode 100644 index 000000000000..90ae8b474cb8 --- /dev/null +++ b/include/linux/netfilter/xt_connlimit.h @@ -0,0 +1,17 @@ +#ifndef _XT_CONNLIMIT_H +#define _XT_CONNLIMIT_H + +struct xt_connlimit_data; + +struct xt_connlimit_info { + union { + u_int32_t v4_mask; + u_int32_t v6_mask[4]; + }; + unsigned int limit, inverse; + + /* this needs to be at the end */ + struct xt_connlimit_data *data __attribute__((aligned(8))); +}; + +#endif /* _XT_CONNLIMIT_H */ -- cgit v1.2.3 From 4381ca3c23b07ba5b567f72325003020ddca0341 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 15 Jul 2007 21:00:11 +0100 Subject: fix return type of skb_checksum_complete() It returns __sum16, not unsigned int Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9391e4a4c344..ce256438e619 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1639,7 +1639,7 @@ static inline int skb_csum_unnecessary(const struct sk_buff *skb) * if skb->ip_summed is CHECKSUM_UNNECESSARY which indicates that the * hardware has already verified the correctness of the checksum. */ -static inline unsigned int skb_checksum_complete(struct sk_buff *skb) +static inline __sum16 skb_checksum_complete(struct sk_buff *skb) { return skb_csum_unnecessary(skb) ? 0 : __skb_checksum_complete(skb); -- cgit v1.2.3 From 22e03f3b58dfcca30f0c8de185022132459638d1 Mon Sep 17 00:00:00 2001 From: Raphael Assenat Date: Tue, 27 Feb 2007 19:49:53 +0000 Subject: leds: Add generic GPIO LED driver This patch adds support for GPIO connected leds via the new GPIO framework. Information about leds (gpio, polarity, name, default trigger) is passed to the driver via platform_data. Signed-off-by: Raphael Assenat Signed-off-by: Richard Purdie --- include/linux/leds.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 88afceffb7cb..059abfe219dc 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -110,4 +110,18 @@ extern void ledtrig_ide_activity(void); #define ledtrig_ide_activity() do {} while(0) #endif +/* For the leds-gpio driver */ +struct gpio_led { + const char *name; + char *default_trigger; + unsigned gpio; + u8 active_low; +}; + +struct gpio_led_platform_data { + int num_leds; + struct gpio_led *leds; +}; + + #endif /* __LINUX_LEDS_H_INCLUDED */ -- cgit v1.2.3 From f8a7c6fe14f556ca8eeddce258cb21392d0c3a2f Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Sun, 8 Jul 2007 23:19:31 +0100 Subject: leds: Convert from struct class_device to struct device Convert the LEDs class from struct class_device to struct device since class_device is scheduled for removal. Signed-off-by: Richard Purdie Acked-by: Greg Kroah-Hartman --- include/linux/leds.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 059abfe219dc..dc1178f6184b 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -16,7 +16,6 @@ #include struct device; -struct class_device; /* * LED Core */ @@ -38,7 +37,7 @@ struct led_classdev { void (*brightness_set)(struct led_classdev *led_cdev, enum led_brightness brightness); - struct class_device *class_dev; + struct device *dev; struct list_head node; /* LED Device list */ char *default_trigger; /* Trigger to use */ -- cgit v1.2.3 From 655bfd7aebb12481ab9275284d9500bee5ba3e70 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Mon, 9 Jul 2007 12:17:24 +0100 Subject: backlight: Convert from struct class_device to struct device Convert the backlight and LCD classes from struct class_device to struct device since class_device is scheduled for removal. One nasty API break is the backlight power attribute has had to be renamed to bl_power and the LCD power attribute has had to be renamed to lcd_power since the original names clash with the core. I can't see a way around this. Signed-off-by: Richard Purdie Acked-by: Greg Kroah-Hartman --- include/linux/backlight.h | 11 ++++++++--- include/linux/lcd.h | 14 ++++++++++---- 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index 1023ba0d6e55..c897c7b03858 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -69,8 +69,8 @@ struct backlight_device { /* The framebuffer notifier block */ struct notifier_block fb_notif; - /* The class device structure */ - struct class_device class_dev; + + struct device dev; }; static inline void backlight_update_status(struct backlight_device *bd) @@ -85,6 +85,11 @@ extern struct backlight_device *backlight_device_register(const char *name, struct device *dev, void *devdata, struct backlight_ops *ops); extern void backlight_device_unregister(struct backlight_device *bd); -#define to_backlight_device(obj) container_of(obj, struct backlight_device, class_dev) +#define to_backlight_device(obj) container_of(obj, struct backlight_device, dev) + +static inline void * bl_get_data(struct backlight_device *bl_dev) +{ + return dev_get_drvdata(&bl_dev->dev); +} #endif diff --git a/include/linux/lcd.h b/include/linux/lcd.h index 598793c0745b..1d379787f2e7 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -62,8 +62,8 @@ struct lcd_device { struct mutex update_lock; /* The framebuffer notifier block */ struct notifier_block fb_notif; - /* The class device structure */ - struct class_device class_dev; + + struct device dev; }; static inline void lcd_set_power(struct lcd_device *ld, int power) @@ -75,9 +75,15 @@ static inline void lcd_set_power(struct lcd_device *ld, int power) } extern struct lcd_device *lcd_device_register(const char *name, - void *devdata, struct lcd_ops *ops); + struct device *parent, void *devdata, struct lcd_ops *ops); extern void lcd_device_unregister(struct lcd_device *ld); -#define to_lcd_device(obj) container_of(obj, struct lcd_device, class_dev) +#define to_lcd_device(obj) container_of(obj, struct lcd_device, dev) + +static inline void * lcd_get_data(struct lcd_device *ld_dev) +{ + return dev_get_drvdata(&ld_dev->dev); +} + #endif -- cgit v1.2.3 From 3d6392cfbd7dc11f23058e3493683afab4ac13a3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 Jul 2007 12:38:05 +0200 Subject: bsg: support for full generic block layer SG v3 Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 ++++++++++++ include/linux/bsg.h | 21 +++++++++++++++++++++ include/linux/genhd.h | 2 ++ 3 files changed, 35 insertions(+) create mode 100644 include/linux/bsg.h (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fae138bd2207..53002d40efa2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -41,6 +41,8 @@ struct elevator_queue; typedef struct elevator_queue elevator_t; struct request_pm_state; struct blk_trace; +struct request; +struct sg_io_hdr; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -607,6 +609,11 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn; #define BLK_BOUNCE_ANY ((u64)blk_max_pfn << PAGE_SHIFT) #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) +/* + * default timeout for SG_IO if none specified + */ +#define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) + #ifdef CONFIG_MMU extern int init_emergency_isa_pool(void); extern void blk_queue_bounce(request_queue_t *q, struct bio **bio); @@ -680,6 +687,11 @@ extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *, struct request *, int, rq_end_io_fn *); +extern int blk_fill_sghdr_rq(request_queue_t *, struct request *, + struct sg_io_hdr *, int); +extern int blk_unmap_sghdr_rq(struct request *, struct sg_io_hdr *); +extern int blk_complete_sghdr_rq(struct request *, struct sg_io_hdr *, + struct bio *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) { diff --git a/include/linux/bsg.h b/include/linux/bsg.h new file mode 100644 index 000000000000..dc0d7282c4cb --- /dev/null +++ b/include/linux/bsg.h @@ -0,0 +1,21 @@ +#ifndef BSG_H +#define BSG_H + +#if defined(CONFIG_BLK_DEV_BSG) +struct bsg_class_device { + struct class_device *class_dev; + struct device *dev; + int minor; + struct gendisk *disk; + struct list_head list; +}; + +extern int bsg_register_disk(struct gendisk *); +extern void bsg_unregister_disk(struct gendisk *); +#else +struct bsg_class_device { }; +#define bsg_register_disk(disk) (0) +#define bsg_unregister_disk(disk) do { } while (0) +#endif + +#endif diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9756fc102a83..8c43d7032612 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -67,6 +67,7 @@ struct partition { #include #include #include +#include struct partition { unsigned char boot_ind; /* 0x80 - active */ @@ -91,6 +92,7 @@ struct hd_struct { #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; #endif + struct bsg_class_device bsg_dev; }; #define GENHD_FL_REMOVABLE 1 -- cgit v1.2.3 From 337ad41deae1b56e56731246322a93251df86e79 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 20 Dec 2006 11:18:54 +0100 Subject: block: export blk_verify_command for SG v4 blk_fill_sghdr_rq doesn't work for SG v4 so verify_command needed to be exported. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 53002d40efa2..f6bc0d03ffad 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -692,6 +692,7 @@ extern int blk_fill_sghdr_rq(request_queue_t *, struct request *, extern int blk_unmap_sghdr_rq(struct request *, struct sg_io_hdr *); extern int blk_complete_sghdr_rq(struct request *, struct sg_io_hdr *, struct bio *); +extern int blk_verify_command(unsigned char *, int); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) { -- cgit v1.2.3 From 45977d0e87ac988d04fccfb89221727aaf8d78a4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 20 Dec 2006 11:19:32 +0100 Subject: bsg: add sg_io_v4 structure This patch adds sg_io_v4 structure that Doug proposed last month. There's one major change from the RFC. I dropped iovec, which needs compat stuff. The bsg code simply calls blk_rq_map_user against dout_xferp/din_xferp. So if possible, the page frames are directly mapped. If not possible, the block layer allocates new page frames and does memory copies. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/bsg.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bsg.h b/include/linux/bsg.h index dc0d7282c4cb..0d212cc06abf 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -1,6 +1,47 @@ #ifndef BSG_H #define BSG_H +struct sg_io_v4 { + int32_t guard; /* [i] 'Q' to differentiate from v3 */ + uint32_t protocol; /* [i] 0 -> SCSI , .... */ + uint32_t subprotocol; /* [i] 0 -> SCSI command, 1 -> SCSI task + management function, .... */ + + uint32_t request_len; /* [i] in bytes */ + uint64_t request; /* [i], [*i] {SCSI: cdb} */ + uint32_t request_attr; /* [i] {SCSI: task attribute} */ + uint32_t request_tag; /* [i] {SCSI: task tag (only if flagged)} */ + uint32_t request_priority; /* [i] {SCSI: task priority} */ + uint32_t max_response_len; /* [i] in bytes */ + uint64_t response; /* [i], [*o] {SCSI: (auto)sense data} */ + + /* "din_" for data in (from device); "dout_" for data out (to device) */ + uint32_t dout_xfer_len; /* [i] bytes to be transferred to device */ + uint32_t din_xfer_len; /* [i] bytes to be transferred from device */ + uint64_t dout_xferp; /* [i], [*i] */ + uint64_t din_xferp; /* [i], [*o] */ + + uint32_t timeout; /* [i] units: millisecond */ + uint32_t flags; /* [i] bit mask */ + uint64_t usr_ptr; /* [i->o] unused internally */ + uint32_t spare_in; /* [i] */ + + uint32_t driver_status; /* [o] 0 -> ok */ + uint32_t transport_status; /* [o] 0 -> ok */ + uint32_t device_status; /* [o] {SCSI: command completion status} */ + uint32_t retry_delay; /* [o] {SCSI: status auxiliary information} */ + uint32_t info; /* [o] additional information */ + uint32_t duration; /* [o] time to complete, in milliseconds */ + uint32_t response_len; /* [o] bytes of response actually written */ + int32_t din_resid; /* [o] actual_din_xfer_len - din_xfer_len */ + uint32_t generated_tag; /* [o] {SCSI: task tag that transport chose} */ + uint32_t spare_out; /* [o] */ + + uint32_t padding; +}; + +#ifdef __KERNEL__ + #if defined(CONFIG_BLK_DEV_BSG) struct bsg_class_device { struct class_device *class_dev; @@ -18,4 +59,6 @@ struct bsg_class_device { }; #define bsg_unregister_disk(disk) do { } while (0) #endif +#endif /* __KERNEL__ */ + #endif -- cgit v1.2.3 From 1594a3f0eb526c73bc3915e8da13f2abf0ea1acd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 Dec 2006 11:23:35 +0100 Subject: bsg: use u32 etc instead of uint32_t Signed-off-by: Jens Axboe --- include/linux/bsg.h | 58 ++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bsg.h b/include/linux/bsg.h index 0d212cc06abf..f968726cfadc 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -2,42 +2,42 @@ #define BSG_H struct sg_io_v4 { - int32_t guard; /* [i] 'Q' to differentiate from v3 */ - uint32_t protocol; /* [i] 0 -> SCSI , .... */ - uint32_t subprotocol; /* [i] 0 -> SCSI command, 1 -> SCSI task + s32 guard; /* [i] 'Q' to differentiate from v3 */ + u32 protocol; /* [i] 0 -> SCSI , .... */ + u32 subprotocol; /* [i] 0 -> SCSI command, 1 -> SCSI task management function, .... */ - uint32_t request_len; /* [i] in bytes */ - uint64_t request; /* [i], [*i] {SCSI: cdb} */ - uint32_t request_attr; /* [i] {SCSI: task attribute} */ - uint32_t request_tag; /* [i] {SCSI: task tag (only if flagged)} */ - uint32_t request_priority; /* [i] {SCSI: task priority} */ - uint32_t max_response_len; /* [i] in bytes */ - uint64_t response; /* [i], [*o] {SCSI: (auto)sense data} */ + u32 request_len; /* [i] in bytes */ + u64 request; /* [i], [*i] {SCSI: cdb} */ + u32 request_attr; /* [i] {SCSI: task attribute} */ + u32 request_tag; /* [i] {SCSI: task tag (only if flagged)} */ + u32 request_priority; /* [i] {SCSI: task priority} */ + u32 max_response_len; /* [i] in bytes */ + u64 response; /* [i], [*o] {SCSI: (auto)sense data} */ /* "din_" for data in (from device); "dout_" for data out (to device) */ - uint32_t dout_xfer_len; /* [i] bytes to be transferred to device */ - uint32_t din_xfer_len; /* [i] bytes to be transferred from device */ - uint64_t dout_xferp; /* [i], [*i] */ - uint64_t din_xferp; /* [i], [*o] */ + u32 dout_xfer_len; /* [i] bytes to be transferred to device */ + u32 din_xfer_len; /* [i] bytes to be transferred from device */ + u64 dout_xferp; /* [i], [*i] */ + u64 din_xferp; /* [i], [*o] */ - uint32_t timeout; /* [i] units: millisecond */ - uint32_t flags; /* [i] bit mask */ - uint64_t usr_ptr; /* [i->o] unused internally */ - uint32_t spare_in; /* [i] */ + u32 timeout; /* [i] units: millisecond */ + u32 flags; /* [i] bit mask */ + u64 usr_ptr; /* [i->o] unused internally */ + u32 spare_in; /* [i] */ - uint32_t driver_status; /* [o] 0 -> ok */ - uint32_t transport_status; /* [o] 0 -> ok */ - uint32_t device_status; /* [o] {SCSI: command completion status} */ - uint32_t retry_delay; /* [o] {SCSI: status auxiliary information} */ - uint32_t info; /* [o] additional information */ - uint32_t duration; /* [o] time to complete, in milliseconds */ - uint32_t response_len; /* [o] bytes of response actually written */ - int32_t din_resid; /* [o] actual_din_xfer_len - din_xfer_len */ - uint32_t generated_tag; /* [o] {SCSI: task tag that transport chose} */ - uint32_t spare_out; /* [o] */ + u32 driver_status; /* [o] 0 -> ok */ + u32 transport_status; /* [o] 0 -> ok */ + u32 device_status; /* [o] {SCSI: command completion status} */ + u32 retry_delay; /* [o] {SCSI: status auxiliary information} */ + u32 info; /* [o] additional information */ + u32 duration; /* [o] time to complete, in milliseconds */ + u32 response_len; /* [o] bytes of response actually written */ + s32 din_resid; /* [o] actual_din_xfer_len - din_xfer_len */ + u32 generated_tag; /* [o] {SCSI: task tag that transport chose} */ + u32 spare_out; /* [o] */ - uint32_t padding; + u32 padding; }; #ifdef __KERNEL__ -- cgit v1.2.3 From 3862153b673516b2efa0447b9b3778f47ac8f8c8 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 22 Dec 2006 09:43:51 +0100 Subject: Replace s32, u32 and u64 with __s32, __u32 and __u64 in bsg.h for userspace Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/bsg.h | 58 ++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bsg.h b/include/linux/bsg.h index f968726cfadc..2154a6dfbd53 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -2,42 +2,42 @@ #define BSG_H struct sg_io_v4 { - s32 guard; /* [i] 'Q' to differentiate from v3 */ - u32 protocol; /* [i] 0 -> SCSI , .... */ - u32 subprotocol; /* [i] 0 -> SCSI command, 1 -> SCSI task + __s32 guard; /* [i] 'Q' to differentiate from v3 */ + __u32 protocol; /* [i] 0 -> SCSI , .... */ + __u32 subprotocol; /* [i] 0 -> SCSI command, 1 -> SCSI task management function, .... */ - u32 request_len; /* [i] in bytes */ - u64 request; /* [i], [*i] {SCSI: cdb} */ - u32 request_attr; /* [i] {SCSI: task attribute} */ - u32 request_tag; /* [i] {SCSI: task tag (only if flagged)} */ - u32 request_priority; /* [i] {SCSI: task priority} */ - u32 max_response_len; /* [i] in bytes */ - u64 response; /* [i], [*o] {SCSI: (auto)sense data} */ + __u32 request_len; /* [i] in bytes */ + __u64 request; /* [i], [*i] {SCSI: cdb} */ + __u32 request_attr; /* [i] {SCSI: task attribute} */ + __u32 request_tag; /* [i] {SCSI: task tag (only if flagged)} */ + __u32 request_priority; /* [i] {SCSI: task priority} */ + __u32 max_response_len; /* [i] in bytes */ + __u64 response; /* [i], [*o] {SCSI: (auto)sense data} */ /* "din_" for data in (from device); "dout_" for data out (to device) */ - u32 dout_xfer_len; /* [i] bytes to be transferred to device */ - u32 din_xfer_len; /* [i] bytes to be transferred from device */ - u64 dout_xferp; /* [i], [*i] */ - u64 din_xferp; /* [i], [*o] */ + __u32 dout_xfer_len; /* [i] bytes to be transferred to device */ + __u32 din_xfer_len; /* [i] bytes to be transferred from device */ + __u64 dout_xferp; /* [i], [*i] */ + __u64 din_xferp; /* [i], [*o] */ - u32 timeout; /* [i] units: millisecond */ - u32 flags; /* [i] bit mask */ - u64 usr_ptr; /* [i->o] unused internally */ - u32 spare_in; /* [i] */ + __u32 timeout; /* [i] units: millisecond */ + __u32 flags; /* [i] bit mask */ + __u64 usr_ptr; /* [i->o] unused internally */ + __u32 spare_in; /* [i] */ - u32 driver_status; /* [o] 0 -> ok */ - u32 transport_status; /* [o] 0 -> ok */ - u32 device_status; /* [o] {SCSI: command completion status} */ - u32 retry_delay; /* [o] {SCSI: status auxiliary information} */ - u32 info; /* [o] additional information */ - u32 duration; /* [o] time to complete, in milliseconds */ - u32 response_len; /* [o] bytes of response actually written */ - s32 din_resid; /* [o] actual_din_xfer_len - din_xfer_len */ - u32 generated_tag; /* [o] {SCSI: task tag that transport chose} */ - u32 spare_out; /* [o] */ + __u32 driver_status; /* [o] 0 -> ok */ + __u32 transport_status; /* [o] 0 -> ok */ + __u32 device_status; /* [o] {SCSI: command completion status} */ + __u32 retry_delay; /* [o] {SCSI: status auxiliary information} */ + __u32 info; /* [o] additional information */ + __u32 duration; /* [o] time to complete, in milliseconds */ + __u32 response_len; /* [o] bytes of response actually written */ + __s32 din_resid; /* [o] actual_din_xfer_len - din_xfer_len */ + __u32 generated_tag; /* [o] {SCSI: task tag that transport chose} */ + __u32 spare_out; /* [o] */ - u32 padding; + __u32 padding; }; #ifdef __KERNEL__ -- cgit v1.2.3 From 45e79a3acdcf54113b3d7b23e9e64e6541dbfeb5 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 9 Jul 2007 12:39:20 +0200 Subject: bsg: add a request_queue argument to scsi_cmd_ioctl() bsg uses scsi_cmd_ioctl() for some SCSI/sg ioctl commands. scsi_cmd_ioctl() gets a request queue from a gendisk arguement. This prevents bsg being bound to SCSI devices that don't have a gendisk (like OSD). This adds a request_queue argument to scsi_cmd_ioctl(). The SCSI/sg ioctl commands doesn't use a gendisk so it's safe for any SCSI devices to use scsi_cmd_ioctl(). Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f6bc0d03ffad..2746632c2267 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -644,7 +644,8 @@ extern void blk_requeue_request(request_queue_t *, struct request *); extern void blk_plug_device(request_queue_t *); extern int blk_remove_plug(request_queue_t *); extern void blk_recount_segments(request_queue_t *, struct bio *); -extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *); +extern int scsi_cmd_ioctl(struct file *, struct request_queue *, + struct gendisk *, unsigned int, void __user *); extern int sg_scsi_ioctl(struct file *, struct request_queue *, struct gendisk *, struct scsi_ioctl_command __user *); -- cgit v1.2.3 From d351af01b9307566135cb0f355ca65d0952c10b5 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 9 Jul 2007 12:40:35 +0200 Subject: bsg: bind bsg to request_queue instead of gendisk This patch binds bsg devices to request_queue instead of gendisk. Any objects (like transport entities) can define own request_handler and create own bsg device. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ include/linux/bsg.h | 10 +++++----- include/linux/genhd.h | 2 -- 3 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2746632c2267..24b474e05a44 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -470,6 +471,10 @@ struct request_queue unsigned int bi_size; struct mutex sysfs_lock; + +#if defined(CONFIG_BLK_DEV_BSG) + struct bsg_class_device bsg_dev; +#endif }; #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ diff --git a/include/linux/bsg.h b/include/linux/bsg.h index 2154a6dfbd53..0475a6d3ff6a 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -47,16 +47,16 @@ struct bsg_class_device { struct class_device *class_dev; struct device *dev; int minor; - struct gendisk *disk; struct list_head list; + struct request_queue *queue; }; -extern int bsg_register_disk(struct gendisk *); -extern void bsg_unregister_disk(struct gendisk *); +extern int bsg_register_queue(struct request_queue *, char *); +extern void bsg_unregister_queue(struct request_queue *); #else struct bsg_class_device { }; -#define bsg_register_disk(disk) (0) -#define bsg_unregister_disk(disk) do { } while (0) +#define bsg_register_queue(disk, name) (0) +#define bsg_unregister_queue(disk) do { } while (0) #endif #endif /* __KERNEL__ */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 8c43d7032612..9756fc102a83 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -67,7 +67,6 @@ struct partition { #include #include #include -#include struct partition { unsigned char boot_ind; /* 0x80 - active */ @@ -92,7 +91,6 @@ struct hd_struct { #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; #endif - struct bsg_class_device bsg_dev; }; #define GENHD_FL_REMOVABLE 1 -- cgit v1.2.3 From 4cf0723ac89b5f2189da2ad07ef875de26b83c77 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 30 Mar 2007 11:19:39 +0200 Subject: bsg: minor bug fixes This fixes the following minor issues: - add EXPORT_SYMBOL_GPL for bsg_register_queue and bsg_unregister_queue. - shut up gcc warnings Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/bsg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bsg.h b/include/linux/bsg.h index 0475a6d3ff6a..0dd01f90ba5e 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -51,7 +51,7 @@ struct bsg_class_device { struct request_queue *queue; }; -extern int bsg_register_queue(struct request_queue *, char *); +extern int bsg_register_queue(struct request_queue *, const char *); extern void bsg_unregister_queue(struct request_queue *); #else struct bsg_class_device { }; -- cgit v1.2.3 From abae1fde63fcdd2a3abaa0d7930938d8326f83d2 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 16 Jul 2007 08:52:14 +0200 Subject: add a struct request pointer to the request structure This adds a struct request pointer to the request structure for the second data phase (bidi for now). A request queue supporting bidi requests sets QUEUE_FLAG_BIDI. This prevents sending bidi requests to a non-bidi queue. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 24b474e05a44..b32564a1e105 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -317,6 +317,9 @@ struct request { */ rq_end_io_fn *end_io; void *end_io_data; + + /* for bidi */ + struct request *next_rq; }; /* @@ -486,6 +489,7 @@ struct request_queue #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ +#define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ enum { /* @@ -550,6 +554,7 @@ enum { #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) +#define blk_bidi_rq(rq) ((rq)->next_rq != NULL) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) -- cgit v1.2.3 From 15d10b611fa94b52f004a08a1d4cf7b39de3cba3 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 16 Jul 2007 08:52:16 +0200 Subject: bsg: add SCSI transport-level request support This enables bsg to handle SCSI transport-level request like SAS management protocol (SMP). - add BSG_SUB_PROTOCOL_{SCSI_CMD, SCSI_TMF, SCSI_TRANSPORT} definitions. - SCSI transport-level requests skip blk_verify_command(). Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/bsg.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bsg.h b/include/linux/bsg.h index 0dd01f90ba5e..bd998ca6cb2e 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -1,6 +1,12 @@ #ifndef BSG_H #define BSG_H +#define BSG_PROTOCOL_SCSI 0 + +#define BSG_SUB_PROTOCOL_SCSI_CMD 0 +#define BSG_SUB_PROTOCOL_SCSI_TMF 1 +#define BSG_SUB_PROTOCOL_SCSI_TRANSPORT 2 + struct sg_io_v4 { __s32 guard; /* [i] 'Q' to differentiate from v3 */ __u32 protocol; /* [i] 0 -> SCSI , .... */ -- cgit v1.2.3 From d6d281684913dabb878e2f53219eed5df2cd867b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 28 Jun 2007 08:38:16 -0400 Subject: KVM: Remove kvmfs in favor of the anonymous inodes source kvm uses a pseudo filesystem, kvmfs, to generate inodes, a job that the new anonymous inodes source does much better. Cc: Davide Libenzi Signed-off-by: Avi Kivity --- include/linux/magic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/magic.h b/include/linux/magic.h index 9d713c03e3da..36cc20dfd142 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -13,7 +13,6 @@ #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 -#define KVMFS_SUPER_MAGIC 0x19700426 #define ANON_INODE_FS_MAGIC 0x09041934 #define MINIX_SUPER_MAGIC 0x137F /* original minix fs */ -- cgit v1.2.3 From db912f963909b3cbc3a059b7528f6a1a1eb6ffae Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 24 May 2007 12:23:10 +0300 Subject: HOTPLUG: Add CPU_DYING notifier KVM wants a notification when a cpu is about to die, so it can disable hardware extensions, but at a time when user processes cannot be scheduled on the cpu, so it doesn't try to use virtualization extensions after they have been disabled. This adds a CPU_DYING notification. The notification is called in atomic context on the doomed cpu. Signed-off-by: Avi Kivity --- include/linux/notifier.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 9431101bf876..576f2bb34cc8 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -196,6 +196,8 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, #define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ #define CPU_LOCK_ACQUIRE 0x0008 /* Acquire all hotcpu locks */ #define CPU_LOCK_RELEASE 0x0009 /* Release all hotcpu locks */ +#define CPU_DYING 0x000A /* CPU (unsigned)v not running any task, + * not handling interrupts, soon dead */ /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend * operation in progress @@ -208,6 +210,7 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, #define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN) #define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) #define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) +#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN) #endif /* __KERNEL__ */ #endif /* _LINUX_NOTIFIER_H */ -- cgit v1.2.3 From a52b1752c077cb919b71167c54968a0b91673281 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 9 Jul 2007 17:11:49 +0300 Subject: SMP: Allow smp_call_function_single() to current cpu This removes the requirement for callers to get_cpu() to check in simple cases. This patch is for !CONFIG_SMP. Cc: Andi Kleen Signed-off-by: Avi Kivity --- include/linux/smp.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 96ac21f8dd73..8039daced688 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -7,6 +7,7 @@ */ #include +#include extern void cpu_idle(void); @@ -102,7 +103,11 @@ static inline void smp_send_reschedule(int cpu) { } static inline int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, int retry, int wait) { - return -EBUSY; + WARN_ON(cpuid != 0); + local_irq_disable(); + func(info); + local_irq_enable(); + return 0; } #endif /* !SMP */ -- cgit v1.2.3 From f2a11b158a24301e9158e9c873fa88e5eb775486 Mon Sep 17 00:00:00 2001 From: Nitin Gupta Date: Sun, 15 Jul 2007 23:37:21 -0700 Subject: LZO1X: fix lzo1x_worst_compress This is a correction for a macro which gives worst case compressed data size by LZO1X. This patch was provided by the LZO author (Markus Oberhumer). Signed-off-by: Nitin Gupta Cc: "Markus F.X.J. Oberhumer" Cc: "Richard Purdie" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lzo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lzo.h b/include/linux/lzo.h index 582d8b711a13..d793497ec1ca 100644 --- a/include/linux/lzo.h +++ b/include/linux/lzo.h @@ -17,7 +17,7 @@ #define LZO1X_MEM_COMPRESS (16384 * sizeof(unsigned char *)) #define LZO1X_1_MEM_COMPRESS LZO1X_MEM_COMPRESS -#define lzo1x_worst_compress(x) (x + (x / 64) + 16 + 3) +#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3) /* This requires 'workmem' of size LZO1X_1_MEM_COMPRESS */ int lzo1x_1_compress(const unsigned char *src, size_t src_len, -- cgit v1.2.3 From 96d7fa421e6424ad9ef6d1d039375dc2edb63fe8 Mon Sep 17 00:00:00 2001 From: Kristian Hoegsberg Date: Sun, 15 Jul 2007 23:37:24 -0700 Subject: lib: add idr_for_each() This patch adds an iterator function for the idr data structure. Compared to just iterating through the idr with an integer and idr_find, this iterator is (almost, but not quite) linear in the number of elements, as opposed to the number of integers in the range covered by the idr. This makes a difference for sparse idrs, but more importantly, it's a nicer way to iterate through the elements. The drm subsystem is moving to idr for tracking contexts and drawables, and with this change, we can use the idr exclusively for tracking these resources. [akpm@linux-foundation.org: fix comment] Signed-off-by: Kristian Hoegsberg Cc: Tejun Heo Cc: Dave Airlie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 915572fa030b..8442c0bffc06 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -78,6 +78,8 @@ void *idr_find(struct idr *idp, int id); int idr_pre_get(struct idr *idp, gfp_t gfp_mask); int idr_get_new(struct idr *idp, void *ptr, int *id); int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); +int idr_for_each(struct idr *idp, + int (*fn)(int id, void *p, void *data), void *data); void *idr_replace(struct idr *idp, void *ptr, int id); void idr_remove(struct idr *idp, int id); void idr_destroy(struct idr *idp); -- cgit v1.2.3 From 23936cc0b5d89619c34c2dab11d8cf3d6f7ca028 Mon Sep 17 00:00:00 2001 From: Kristian Hoegsberg Date: Sun, 15 Jul 2007 23:37:24 -0700 Subject: lib: add idr_remove_all Remove all ids from the given idr tree. idr_destroy() only frees up unused, cached idp_layers, but this function will remove all id mappings and leave all idp_layers unused. A typical clean-up sequence for objects stored in an idr tree, will use idr_for_each() to free all objects, if necessay, then idr_remove_all() to remove all ids, and idr_destroy() to free up the cached idr_layers. Signed-off-by: Kristian Hoegsberg Cc: Tejun Heo Cc: Dave Airlie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 8442c0bffc06..0edda411959c 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -82,6 +82,7 @@ int idr_for_each(struct idr *idp, int (*fn)(int id, void *p, void *data), void *data); void *idr_replace(struct idr *idp, void *ptr, int id); void idr_remove(struct idr *idp, int id); +void idr_remove_all(struct idr *idp); void idr_destroy(struct idr *idp); void idr_init(struct idr *idp); -- cgit v1.2.3 From 18a8bd949d6adb311ea816125ff65050df1f3f6e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 15 Jul 2007 23:37:59 -0700 Subject: serial: convert early_uart to earlycon for 8250 Beacuse SERIAL_PORT_DFNS is removed from include/asm-i386/serial.h and include/asm-x86_64/serial.h. the serial8250_ports need to be probed late in serial initializing stage. the console_init=>serial8250_console_init=> register_console=>serial8250_console_setup will return -ENDEV, and console ttyS0 can not be enabled at that time. need to wait till uart_add_one_port in drivers/serial/serial_core.c to call register_console to get console ttyS0. that is too late. Make early_uart to use early_param, so uart console can be used earlier. Make it to be bootconsole with CON_BOOT flag, so can use console handover feature. and it will switch to corresponding normal serial console automatically. new command line will be: console=uart8250,io,0x3f8,9600n8 console=uart8250,mmio,0xff5e0000,115200n8 or earlycon=uart8250,io,0x3f8,9600n8 earlycon=uart8250,mmio,0xff5e0000,115200n8 it will print in very early stage: Early serial console at I/O port 0x3f8 (options '9600n8') console [uart0] enabled later for console it will print: console handover: boot [uart0] -> real [ttyS0] Signed-off-by: Cc: Andi Kleen Cc: Bjorn Helgaas Cc: Russell King Cc: Gerd Hoffmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/console.h | 2 ++ include/linux/serial.h | 6 ------ include/linux/serial_8250.h | 4 ++++ 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 62ef6e11d0d2..c44d3dfde7a5 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -99,6 +99,7 @@ struct console { struct tty_driver *(*device)(struct console *, int *); void (*unblank)(void); int (*setup)(struct console *, char *); + int (*early_setup)(void); short flags; short index; int cflag; @@ -107,6 +108,7 @@ struct console { }; extern int add_preferred_console(char *name, int idx, char *options); +extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options); extern void register_console(struct console *); extern int unregister_console(struct console *); extern struct console *console_drivers; diff --git a/include/linux/serial.h b/include/linux/serial.h index 33fc8cb8ddfb..deb714314fb1 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -177,11 +177,5 @@ struct serial_icounter_struct { #ifdef __KERNEL__ #include -/* Allow architectures to override entries in serial8250_ports[] at run time: */ -struct uart_port; /* forward declaration */ -extern int early_serial_setup(struct uart_port *port); -extern int early_serial_console_init(char *options); -extern int serial8250_start_console(struct uart_port *port, char *options); - #endif /* __KERNEL__ */ #endif /* _LINUX_SERIAL_H */ diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 71310d80c09a..706ee9a4c80c 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -60,4 +60,8 @@ void serial8250_unregister_port(int line); void serial8250_suspend_port(int line); void serial8250_resume_port(int line); +extern int serial8250_find_port(struct uart_port *p); +extern int serial8250_find_port_for_earlycon(void); +extern int setup_early_serial8250_console(char *cmdline); + #endif -- cgit v1.2.3 From f0c0b2b808f232741eadac272bd4bc51f18df0f4 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Sun, 15 Jul 2007 23:38:01 -0700 Subject: change zonelist order: zonelist order selection logic Make zonelist creation policy selectable from sysctl/boot option v6. This patch makes NUMA's zonelist (of pgdat) order selectable. Available order are Default(automatic)/ Node-based / Zone-based. [Default Order] The kernel selects Node-based or Zone-based order automatically. [Node-based Order] This policy treats the locality of memory as the most important parameter. Zonelist order is created by each zone's locality. This means lower zones (ex. ZONE_DMA) can be used before higher zone (ex. ZONE_NORMAL) exhausion. IOW. ZONE_DMA will be in the middle of zonelist. current 2.6.21 kernel uses this. Pros. * A user can expect local memory as much as possible. Cons. * lower zone will be exhansted before higher zone. This may cause OOM_KILL. Maybe suitable if ZONE_DMA is relatively big and you never see OOM_KILL because of ZONE_DMA exhaution and you need the best locality. (example) assume 2 node NUMA. node(0) has ZONE_DMA/ZONE_NORMAL, node(1) has ZONE_NORMAL. *node(0)'s memory allocation order: node(0)'s NORMAL -> node(0)'s DMA -> node(1)'s NORMAL. *node(1)'s memory allocation order: node(1)'s NORMAL -> node(0)'s NORMAL -> node(0)'s DMA. [Zone-based order] This policy treats the zone type as the most important parameter. Zonelist order is created by zone-type order. This means lower zone never be used bofere higher zone exhaustion. IOW. ZONE_DMA will be always at the tail of zonelist. Pros. * OOM_KILL(bacause of lower zone) occurs only if the whole zones are exhausted. Cons. * memory locality may not be best. (example) assume 2 node NUMA. node(0) has ZONE_DMA/ZONE_NORMAL, node(1) has ZONE_NORMAL. *node(0)'s memory allocation order: node(0)'s NORMAL -> node(1)'s NORMAL -> node(0)'s DMA. *node(1)'s memory allocation order: node(1)'s NORMAL -> node(0)'s NORMAL -> node(0)'s DMA. bootoption "numa_zonelist_order=" and proc/sysctl is supporetd. command: %echo N > /proc/sys/vm/numa_zonelist_order Will rebuild zonelist in Node-based order. command: %echo Z > /proc/sys/vm/numa_zonelist_order Will rebuild zonelist in Zone-based order. Thanks to Lee Schermerhorn, he gives me much help and codes. [Lee.Schermerhorn@hp.com: add check_highest_zone to build_zonelists_in_zone_order] [akpm@linux-foundation.org: build fix] Signed-off-by: KAMEZAWA Hiroyuki Cc: Lee Schermerhorn Cc: Christoph Lameter Cc: Andi Kleen Cc: "jesse.barnes@intel.com" Signed-off-by: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d09b1345a3a1..04b1636a970b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -566,6 +566,11 @@ int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +extern int numa_zonelist_order_handler(struct ctl_table *, int, + struct file *, void __user *, size_t *, loff_t *); +extern char numa_zonelist_order[]; +#define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */ + #include /* Returns the number of the current Node. */ #ifndef numa_node_id -- cgit v1.2.3 From 698827fa9f45019df1609bb686bc51c94e127fbc Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Sun, 15 Jul 2007 23:38:06 -0700 Subject: Remove the deprecated "kmem_cache_t" typedef from slab.h. Given that there is no remaining usage of the deprecated kmem_cache_t typedef anywhere in the tree, remove that typedef. Signed-off-by: Robert P. J. Day Acked-by: Pekka Enberg Acked-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index cebcd3833c76..cd6ab658553f 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -14,8 +14,6 @@ #include #include -typedef struct kmem_cache kmem_cache_t __deprecated; - /* * Flags to pass to kmem_cache_create(). * The ones marked DEBUG are only valid if CONFIG_SLAB_DEBUG is set. -- cgit v1.2.3 From fc9a07e7bf1a76e710f5df017abb07628db1781d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 15 Jul 2007 23:38:14 -0700 Subject: invalidate_mapping_pages(): add cond_resched invalidate_mapping_pages() can sometimes take a long time (millions of pages to free). Long enough for the softlockup detector to trigger. We used to have a cond_resched() in there but I took it out because the drop_caches code calls invalidate_mapping_pages() under inode_lock. The patch adds a nasty flag and puts the cond_resched() back. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4f0b3bf5983c..51c938a71dec 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1610,6 +1610,9 @@ extern int __invalidate_device(struct block_device *); extern int invalidate_partition(struct gendisk *, int); #endif extern int invalidate_inodes(struct super_block *); +unsigned long __invalidate_mapping_pages(struct address_space *mapping, + pgoff_t start, pgoff_t end, + bool be_atomic); unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); -- cgit v1.2.3 From 8f0accc8627043702e6ea2bb8b9aa3a171ef8393 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sun, 15 Jul 2007 23:38:19 -0700 Subject: kill vmalloc_earlyreserve This symbol got orphaned quite a while ago. Signed-off-by: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1c1207472bb4..bbd427e8741a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -27,7 +27,6 @@ extern unsigned long max_mapnr; extern unsigned long num_physpages; extern void * high_memory; -extern unsigned long vmalloc_earlyreserve; extern int page_cluster; #ifdef CONFIG_SYSCTL -- cgit v1.2.3 From 6193a2ff180920f84ee06977165ebf32431fc2d2 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Sun, 15 Jul 2007 23:38:22 -0700 Subject: slob: initial NUMA support This adds preliminary NUMA support to SLOB, primarily aimed at systems with small nodes (tested all the way down to a 128kB SRAM block), whether asymmetric or otherwise. We follow the same conventions as SLAB/SLUB, preferring current node placement for new pages, or with explicit placement, if a node has been specified. Presently on UP NUMA this has the side-effect of preferring node#0 allocations (since numa_node_id() == 0, though this could be reworked if we could hand off a pfn to determine node placement), so single-CPU NUMA systems will want to place smaller nodes further out in terms of node id. Once a page has been bound to a node (via explicit node id typing), we only do block allocations from partial free pages that have a matching node id in the page flags. The current implementation does have some scalability problems, in that all partial free pages are tracked in the global freelist (with contention due to the single spinlock). However, these are things that are being reworked for SMP scalability first, while things like per-node freelists can easily be built on top of this sort of functionality once it's been added. More background can be found in: http://marc.info/?l=linux-mm&m=118117916022379&w=2 http://marc.info/?l=linux-mm&m=118170446306199&w=2 http://marc.info/?l=linux-mm&m=118187859420048&w=2 and subsequent threads. Acked-by: Christoph Lameter Acked-by: Matt Mackall Signed-off-by: Paul Mundt Acked-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 126 +++++++++++++++++++++++------------------------ include/linux/slab_def.h | 4 ++ include/linux/slob_def.h | 46 +++++++++++++++++ include/linux/slub_def.h | 6 ++- 4 files changed, 117 insertions(+), 65 deletions(-) create mode 100644 include/linux/slob_def.h (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index cd6ab658553f..27402fea9b79 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -42,7 +42,6 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, void (*)(void *, struct kmem_cache *, unsigned long)); void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); -void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *kmem_cache_zalloc(struct kmem_cache *, gfp_t); void kmem_cache_free(struct kmem_cache *, void *); unsigned int kmem_cache_size(struct kmem_cache *); @@ -61,16 +60,6 @@ int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr); sizeof(struct __struct), __alignof__(struct __struct),\ (__flags), NULL, NULL) -#ifdef CONFIG_NUMA -extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); -#else -static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep, - gfp_t flags, int node) -{ - return kmem_cache_alloc(cachep, flags); -} -#endif - /* * The largest kmalloc size supported by the slab allocators is * 32 megabyte (2^25) or the maximum allocatable page order if that is @@ -89,7 +78,6 @@ static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep, /* * Common kmalloc functions provided by all allocators */ -void *__kmalloc(size_t, gfp_t); void *__kzalloc(size_t, gfp_t); void * __must_check krealloc(const void *, size_t, gfp_t); void kfree(const void *); @@ -100,40 +88,6 @@ size_t ksize(const void *); * @n: number of elements. * @size: element size. * @flags: the type of memory to allocate. - */ -static inline void *kcalloc(size_t n, size_t size, gfp_t flags) -{ - if (n != 0 && size > ULONG_MAX / n) - return NULL; - return __kzalloc(n * size, flags); -} - -/* - * Allocator specific definitions. These are mainly used to establish optimized - * ways to convert kmalloc() calls to kmem_cache_alloc() invocations by selecting - * the appropriate general cache at compile time. - */ - -#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB) -#ifdef CONFIG_SLUB -#include -#else -#include -#endif /* !CONFIG_SLUB */ -#else - -/* - * Fallback definitions for an allocator not wanting to provide - * its own optimized kmalloc definitions (like SLOB). - */ - -/** - * kmalloc - allocate memory - * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate. - * - * kmalloc is the normal method of allocating memory - * in the kernel. * * The @flags argument may be one of: * @@ -141,7 +95,7 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) * * %GFP_KERNEL - Allocate normal kernel ram. May sleep. * - * %GFP_ATOMIC - Allocation will not sleep. + * %GFP_ATOMIC - Allocation will not sleep. May use emergency pools. * For example, use this inside interrupt handlers. * * %GFP_HIGHUSER - Allocate pages from high memory. @@ -150,18 +104,22 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) * * %GFP_NOFS - Do not make any fs calls while trying to get memory. * + * %GFP_NOWAIT - Allocation will not sleep. + * + * %GFP_THISNODE - Allocate node-local memory only. + * + * %GFP_DMA - Allocation suitable for DMA. + * Should only be used for kmalloc() caches. Otherwise, use a + * slab created with SLAB_DMA. + * * Also it is possible to set different flags by OR'ing * in one or more of the following additional @flags: * * %__GFP_COLD - Request cache-cold pages instead of * trying to return cache-warm pages. * - * %__GFP_DMA - Request memory from the DMA-capable zone. - * * %__GFP_HIGH - This allocation has high priority and may use emergency pools. * - * %__GFP_HIGHMEM - Allocated memory may be from highmem. - * * %__GFP_NOFAIL - Indicate that this allocation is in no way allowed to fail * (think twice before using). * @@ -171,24 +129,57 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) * %__GFP_NOWARN - If allocation fails, don't issue any warnings. * * %__GFP_REPEAT - If allocation fails initially, try once more before failing. + * + * There are other flags available as well, but these are not intended + * for general use, and so are not documented here. For a full list of + * potential flags, always refer to linux/gfp.h. */ -static inline void *kmalloc(size_t size, gfp_t flags) +static inline void *kcalloc(size_t n, size_t size, gfp_t flags) { - return __kmalloc(size, flags); + if (n != 0 && size > ULONG_MAX / n) + return NULL; + return __kzalloc(n * size, flags); } -/** - * kzalloc - allocate memory. The memory is set to zero. - * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate (see kmalloc). +/* + * Allocator specific definitions. These are mainly used to establish optimized + * ways to convert kmalloc() calls to kmem_cache_alloc() invocations by + * selecting the appropriate general cache at compile time. + * + * Allocators must define at least: + * + * kmem_cache_alloc() + * __kmalloc() + * kmalloc() + * kzalloc() + * + * Those wishing to support NUMA must also define: + * + * kmem_cache_alloc_node() + * kmalloc_node() + * + * See each allocator definition file for additional comments and + * implementation notes. */ -static inline void *kzalloc(size_t size, gfp_t flags) -{ - return __kzalloc(size, flags); -} +#ifdef CONFIG_SLUB +#include +#elif defined(CONFIG_SLOB) +#include +#else +#include #endif -#ifndef CONFIG_NUMA +#if !defined(CONFIG_NUMA) && !defined(CONFIG_SLOB) +/** + * kmalloc_node - allocate memory from a specific node + * @size: how many bytes of memory are required. + * @flags: the type of memory to allocate (see kcalloc). + * @node: node to allocate from. + * + * kmalloc() for non-local nodes, used to allocate from a specific node + * if available. Equivalent to kmalloc() in the non-NUMA single-node + * case. + */ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) { return kmalloc(size, flags); @@ -198,7 +189,15 @@ static inline void *__kmalloc_node(size_t size, gfp_t flags, int node) { return __kmalloc(size, flags); } -#endif /* !CONFIG_NUMA */ + +void *kmem_cache_alloc(struct kmem_cache *, gfp_t); + +static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep, + gfp_t flags, int node) +{ + return kmem_cache_alloc(cachep, flags); +} +#endif /* !CONFIG_NUMA && !CONFIG_SLOB */ /* * kmalloc_track_caller is a special version of kmalloc that records the @@ -245,4 +244,3 @@ extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *); #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ - diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 8d81a60518e4..365d036c454a 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -25,6 +25,9 @@ struct cache_sizes { }; extern struct cache_sizes malloc_sizes[]; +void *kmem_cache_alloc(struct kmem_cache *, gfp_t); +void *__kmalloc(size_t size, gfp_t flags); + static inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { @@ -79,6 +82,7 @@ found: #ifdef CONFIG_NUMA extern void *__kmalloc_node(size_t size, gfp_t flags, int node); +extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); static inline void *kmalloc_node(size_t size, gfp_t flags, int node) { diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h new file mode 100644 index 000000000000..a2daf2d418a9 --- /dev/null +++ b/include/linux/slob_def.h @@ -0,0 +1,46 @@ +#ifndef __LINUX_SLOB_DEF_H +#define __LINUX_SLOB_DEF_H + +void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); + +static inline void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) +{ + return kmem_cache_alloc_node(cachep, flags, -1); +} + +void *__kmalloc_node(size_t size, gfp_t flags, int node); + +static inline void *kmalloc_node(size_t size, gfp_t flags, int node) +{ + return __kmalloc_node(size, flags, node); +} + +/** + * kmalloc - allocate memory + * @size: how many bytes of memory are required. + * @flags: the type of memory to allocate (see kcalloc). + * + * kmalloc is the normal method of allocating memory + * in the kernel. + */ +static inline void *kmalloc(size_t size, gfp_t flags) +{ + return __kmalloc_node(size, flags, -1); +} + +static inline void *__kmalloc(size_t size, gfp_t flags) +{ + return kmalloc(size, flags); +} + +/** + * kzalloc - allocate memory. The memory is set to zero. + * @size: how many bytes of memory are required. + * @flags: the type of memory to allocate (see kcalloc). + */ +static inline void *kzalloc(size_t size, gfp_t flags) +{ + return __kzalloc(size, flags); +} + +#endif /* __LINUX_SLOB_DEF_H */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 6207a3d8da71..a582f6771525 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -171,6 +171,9 @@ static inline struct kmem_cache *kmalloc_slab(size_t size) #define ZERO_SIZE_PTR ((void *)16) +void *kmem_cache_alloc(struct kmem_cache *, gfp_t); +void *__kmalloc(size_t size, gfp_t flags); + static inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size) && !(flags & SLUB_DMA)) { @@ -198,7 +201,8 @@ static inline void *kzalloc(size_t size, gfp_t flags) } #ifdef CONFIG_NUMA -extern void *__kmalloc_node(size_t size, gfp_t flags, int node); +void *__kmalloc_node(size_t size, gfp_t flags, int node); +void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); static inline void *kmalloc_node(size_t size, gfp_t flags, int node) { -- cgit v1.2.3 From 0165ab443556bdfad388da6c33d74a71b77d72b2 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sun, 15 Jul 2007 23:38:26 -0700 Subject: split mmap This is a straightforward split of do_mmap_pgoff() into two functions: - do_mmap_pgoff() checks the parameters, and calculates the vma flags. Then it calls - mmap_region(), which does the actual mapping Signed-off-by: Miklos Szeredi Acked-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index bbd427e8741a..d4ab4e590e23 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1071,6 +1071,10 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff); +extern unsigned long mmap_region(struct file *file, unsigned long addr, + unsigned long len, unsigned long flags, + unsigned int vm_flags, unsigned long pgoff, + int accountable); static inline unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, -- cgit v1.2.3 From 786d7e1612f0b0adb6046f19b906609e4fe8b1ba Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 15 Jul 2007 23:39:00 -0700 Subject: Fix rmmod/read/write races in /proc entries Fix following races: =========================================== 1. Write via ->write_proc sleeps in copy_from_user(). Module disappears meanwhile. Or, more generically, system call done on /proc file, method supplied by module is called, module dissapeares meanwhile. pde = create_proc_entry() if (!pde) return -ENOMEM; pde->write_proc = ... open write copy_from_user pde = create_proc_entry(); if (!pde) { remove_proc_entry(); return -ENOMEM; /* module unloaded */ } *boom* ========================================== 2. bogo-revoke aka proc_kill_inodes() remove_proc_entry vfs_read proc_kill_inodes [check ->f_op validness] [check ->f_op->read validness] [verify_area, security permissions checks] ->f_op = NULL; if (file->f_op->read) /* ->f_op dereference, boom */ NOTE, NOTE, NOTE: file_operations are proxied for regular files only. Let's see how this scheme behaves, then extend if needed for directories. Directories creators in /proc only set ->owner for them, so proxying for directories may be unneeded. NOTE, NOTE, NOTE: methods being proxied are ->llseek, ->read, ->write, ->poll, ->unlocked_ioctl, ->ioctl, ->compat_ioctl, ->open, ->release. If your in-tree module uses something else, yell on me. Full audit pending. [akpm@linux-foundation.org: build fix] Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 3469f96bc8b2..28e3664fdf1b 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -7,6 +7,8 @@ #include #include +struct completion; + /* * The proc filesystem constants/structures */ @@ -56,6 +58,14 @@ struct proc_dir_entry { gid_t gid; loff_t size; const struct inode_operations *proc_iops; + /* + * NULL ->proc_fops means "PDE is going away RSN" or + * "PDE is just created". In either case, e.g. ->read_proc won't be + * called because it's too late or too early, respectively. + * + * If you're allocating ->proc_fops dynamically, save a pointer + * somewhere. + */ const struct file_operations *proc_fops; get_info_t *get_info; struct module *owner; @@ -66,6 +76,9 @@ struct proc_dir_entry { atomic_t count; /* use count */ int deleted; /* delete flag */ void *set; + int pde_users; /* number of callers into module in progress */ + spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ + struct completion *pde_unload_completion; }; struct kcore_list { -- cgit v1.2.3 From e1f4a88c5a15a86124a95ea712213bb7dab2ad99 Mon Sep 17 00:00:00 2001 From: Satyam Sharma Date: Sun, 15 Jul 2007 23:39:24 -0700 Subject: introduce write_trylock_irqsave() Introduce a write_trylock_irqsave() implementation. Similar in style to the implementation of spin_trylock_irqsave() in mainline. Signed-off-by: Satyam Sharma Cc: Sripathi Kodi Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/spinlock.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index a946176db638..c376f3b36c89 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -282,6 +282,13 @@ do { \ 1 : ({ local_irq_restore(flags); 0; }); \ }) +#define write_trylock_irqsave(lock, flags) \ +({ \ + local_irq_save(flags); \ + write_trylock(lock) ? \ + 1 : ({ local_irq_restore(flags); 0; }); \ +}) + /* * Locks two spinlocks l1 and l2. * l1_first indicates if spinlock l1 should be taken first. -- cgit v1.2.3 From ed4aaadb1a7913f509f05d3e67840541a180713f Mon Sep 17 00:00:00 2001 From: "Zhang, Yanmin" Date: Sun, 15 Jul 2007 23:39:39 -0700 Subject: fix jvc cdrom drive lockup Before calling init_hwif_default, ide_unregister gets lock ide_lock and disables irq. init_hwif_default calls ide_default_io_base which calls pci_get_device and later pci_get_subsys tries to apply for semaphore pci_bus_sem and goes to sleep. Mostly, pci_get_device should be called when irq is turned on. ide_default_io_base just needs find if list pci_devices is empty. Signed-off-by: Zhang Yanmin Cc: Greg KH Cc: Bartlomiej Zolnierkiewicz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 37a71580ad8a..5e84f2e8d54c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -432,6 +432,8 @@ extern struct bus_type pci_bus_type; * code, or pci core code. */ extern struct list_head pci_root_buses; /* list of all known PCI buses */ extern struct list_head pci_devices; /* list of all devices */ +/* Some device drivers need know if pci is initiated */ +extern int no_pci_devices(void); void pcibios_fixup_bus(struct pci_bus *); int __must_check pcibios_enable_device(struct pci_dev *, int mask); @@ -724,6 +726,7 @@ static inline struct pci_dev *pci_get_class(unsigned int class, struct pci_dev * { return NULL; } #define pci_dev_present(ids) (0) +#define no_pci_devices() (1) #define pci_find_present(ids) (NULL) #define pci_dev_put(dev) do { } while (0) -- cgit v1.2.3 From 7c3f1a573237b90ef331267260358a0ec4ac9079 Mon Sep 17 00:00:00 2001 From: Tomas Janousek Date: Sun, 15 Jul 2007 23:39:41 -0700 Subject: Introduce boot based time The commits 411187fb05cd11676b0979d9fbf3291db69dbce2 (GTOD: persistent clock support) c1d370e167d66b10bca3b602d3740405469383de (i386: use GTOD persistent clock support) changed the monotonic time so that it no longer jumps after resume, but it's not possible to use it for boot time and process start time calculations then. Also, the uptime no longer increases during suspend. I add a variable to track the wall_to_monotonic changes, a function to get the real boot time and a function to get the boot based time from the monotonic one. [akpm@linux-foundation.org: remove exports, add comment] Signed-off-by: Tomas Janousek Cc: Tomas Smetana Cc: John Stultz Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index dda9be685ab6..4bb05a829be9 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -116,6 +116,8 @@ extern int do_setitimer(int which, struct itimerval *value, extern unsigned int alarm_setitimer(unsigned int seconds); extern int do_getitimer(int which, struct itimerval *value); extern void getnstimeofday(struct timespec *tv); +extern void getboottime(struct timespec *ts); +extern void monotonic_to_bootbased(struct timespec *ts); extern struct timespec timespec_trunc(struct timespec t, unsigned gran); extern int timekeeping_is_continuous(void); -- cgit v1.2.3 From 924b42d5a2dbe508407a0a6290d3751f826bccdd Mon Sep 17 00:00:00 2001 From: Tomas Janousek Date: Sun, 15 Jul 2007 23:39:42 -0700 Subject: Use boot based time for process start time and boot time in /proc Commit 411187fb05cd11676b0979d9fbf3291db69dbce2 caused boot time to move and process start times to become invalid after suspend. Using boot based time for those restores the old behaviour and fixes the issue. [akpm@linux-foundation.org: little cleanup] Signed-off-by: Tomas Janousek Cc: Tomas Smetana Acked-by: John Stultz Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index cfb680585ab8..3cffc1204663 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -972,7 +972,8 @@ struct task_struct { unsigned int rt_priority; cputime_t utime, stime; unsigned long nvcsw, nivcsw; /* context switch counts */ - struct timespec start_time; + struct timespec start_time; /* monotonic time */ + struct timespec real_start_time; /* boot based time */ /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; -- cgit v1.2.3 From 9c1729db3e6d738f872bcb090212af00473bf666 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Sun, 15 Jul 2007 23:39:43 -0700 Subject: Prevent an O_NDELAY writer from blocking when a tty write is blocked by the tty atomic writer mutex Without this a tty write could block if a previous blocking tty write was in progress on the same tty and blocked by a line discipline or hardware event. Originally found and reported by Dave Johnson. Signed-off-by: Alan Cox Acked-by: Dave Johnson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/tty.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index bb4576085203..deaba9ec5004 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -338,6 +338,12 @@ extern struct tty_struct *get_current_tty(void); extern struct mutex tty_mutex; +extern void tty_write_unlock(struct tty_struct *tty); +extern int tty_write_lock(struct tty_struct *tty, int ndelay); +#define tty_is_writelocked(tty) (mutex_is_locked(&tty->atomic_write_lock)) + + + /* n_tty.c */ extern struct tty_ldisc tty_ldisc_N_TTY; -- cgit v1.2.3 From 9ac162521cd9796f44d263a61090634844c719a6 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Sun, 15 Jul 2007 23:39:49 -0700 Subject: Use mutexes instead of semaphores in I2O driver The I2O driver uses two semaphores as mutexes. Use the mutex API instead of the (binary) semaphores. Signed-off-by: Matthias Kaehlcke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2o.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2o.h b/include/linux/i2o.h index 52f53e2e70c3..333a370a3bdc 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -31,6 +31,7 @@ #include #include /* work_struct */ #include +#include #include #include /* Needed for MUTEX init macros */ @@ -425,7 +426,7 @@ struct i2o_device { struct device device; - struct semaphore lock; /* device lock */ + struct mutex lock; /* device lock */ }; /* @@ -544,7 +545,7 @@ struct i2o_controller { struct i2o_dma hrt; /* HW Resource Table */ i2o_lct *lct; /* Logical Config Table */ struct i2o_dma dlct; /* Temp LCT */ - struct semaphore lct_lock; /* Lock for LCT updates */ + struct mutex lct_lock; /* Lock for LCT updates */ struct i2o_dma status_block; /* IOP status block */ struct i2o_io base; /* controller messaging unit */ -- cgit v1.2.3 From 21f3da95daed2d0f0c28cc4ef8b1103fbfb7bded Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 15 Jul 2007 23:39:50 -0700 Subject: fuse warning fix gcc-4.3: fs/fuse/dir.c: In function 'parse_dirfile': fs/fuse/dir.c:833: warning: cast from pointer to integer of different size fs/fuse/dir.c:835: warning: cast from pointer to integer of different size [miklos@szeredi.hu: use offsetof] Acked-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fuse.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 534744efe30d..9fbe9d258e22 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -339,7 +339,7 @@ struct fuse_dirent { char name[0]; }; -#define FUSE_NAME_OFFSET ((unsigned) ((struct fuse_dirent *) 0)->name) +#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) #define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1)) #define FUSE_DIRENT_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) -- cgit v1.2.3 From c67ad917cbf21b2862e2cf8e8b28339872ef7927 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 15 Jul 2007 23:39:51 -0700 Subject: percpu_counters(): use cpu notifiers per-cpu counters presently must iterate over all possible CPUs in the exhaustive percpu_counter_sum(). But it can be much better to only iterate over the presently-online CPUs. To do this, we must arrange for an offlined CPU's count to be spilled into the counter's central count. We can do this for all percpu_counters in the machine by linking them into a single global list and walking that list at CPU_DEAD time. (I hope. Might have race windows in which the percpu_counter_sum() count is inaccurate?) Cc: Gautham R Shenoy Cc: Oleg Nesterov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu_counter.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index f5aa593ccf32..3d9f70972cdf 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -17,6 +18,9 @@ struct percpu_counter { spinlock_t lock; s64 count; +#ifdef CONFIG_HOTPLUG_CPU + struct list_head list; /* All percpu_counters are on a list */ +#endif s32 *counters; }; @@ -26,18 +30,8 @@ struct percpu_counter { #define FBC_BATCH (NR_CPUS*4) #endif -static inline void percpu_counter_init(struct percpu_counter *fbc, s64 amount) -{ - spin_lock_init(&fbc->lock); - fbc->count = amount; - fbc->counters = alloc_percpu(s32); -} - -static inline void percpu_counter_destroy(struct percpu_counter *fbc) -{ - free_percpu(fbc->counters); -} - +void percpu_counter_init(struct percpu_counter *fbc, s64 amount); +void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_mod(struct percpu_counter *fbc, s32 amount); s64 percpu_counter_sum(struct percpu_counter *fbc); -- cgit v1.2.3 From 9aacd599342fdfc1fb9422f37e900609b7a46249 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Sun, 15 Jul 2007 23:39:56 -0700 Subject: fat: gcc 4.3 warning fix This patch fixes the following warnings. fs/fat/dir.c: In function 'fat_parse_long': include/linux/msdos_fs.h:294: warning: array subscript is above array bounds include/linux/msdos_fs.h:295: warning: array subscript is above array bounds include/linux/msdos_fs.h:295: warning: array subscript is above array bounds The ->name is defined as "name[8], ext[3]", but fat_checksum() uses those as name[11]. There is no actual problem, but it's not a good manner. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msdos_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index 0e09c005dda8..f950921523f5 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -146,7 +146,7 @@ struct fat_boot_fsinfo { }; struct msdos_dir_entry { - __u8 name[8],ext[3]; /* name and extension */ + __u8 name[MSDOS_NAME];/* name and extension */ __u8 attr; /* attribute bits */ __u8 lcase; /* Case for base and extension */ __u8 ctime_cs; /* Creation time, centiseconds (0-199) */ -- cgit v1.2.3 From 0a3021f4e249fbdb5f30d614707b5e02022e4c9b Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Sun, 15 Jul 2007 23:39:57 -0700 Subject: Remove unnecessary includes of spinlock.h under include/linux Remove the obviously unnecessary includes of under the include/linux/ directory, and fix the couple errors that are introduced as a result of that. Signed-off-by: Robert P. J. Day Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/attribute_container.h | 1 - include/linux/capability.h | 1 - include/linux/console.h | 1 - include/linux/ds17287rtc.h | 1 - include/linux/ipc.h | 1 + include/linux/leds.h | 1 - include/linux/module.h | 1 - include/linux/percpu.h | 2 +- include/linux/scx200_gpio.h | 2 -- include/linux/signal.h | 1 - include/linux/smp_lock.h | 1 - include/linux/timer.h | 1 - 12 files changed, 2 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h index 93bfb0beb62a..8ff274933948 100644 --- a/include/linux/attribute_container.h +++ b/include/linux/attribute_container.h @@ -12,7 +12,6 @@ #include #include #include -#include struct attribute_container { struct list_head node; diff --git a/include/linux/capability.h b/include/linux/capability.h index bbf8df7de28f..2dfa58555934 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -44,7 +44,6 @@ typedef struct __user_cap_data_struct { #ifdef __KERNEL__ -#include #include /* #define STRICT_CAP_T_TYPECHECKS */ diff --git a/include/linux/console.h b/include/linux/console.h index c44d3dfde7a5..56a7bcda49cb 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -15,7 +15,6 @@ #define _LINUX_CONSOLE_H_ 1 #include -#include struct vc_data; struct console_font_op; diff --git a/include/linux/ds17287rtc.h b/include/linux/ds17287rtc.h index c281ba42e28f..d85d3f497b96 100644 --- a/include/linux/ds17287rtc.h +++ b/include/linux/ds17287rtc.h @@ -11,7 +11,6 @@ #define __LINUX_DS17287RTC_H #include /* get the user-level API */ -#include /* spinlock_t */ #include /* Register A */ diff --git a/include/linux/ipc.h b/include/linux/ipc.h index 1980867a64a4..7c8c6d8d090c 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -52,6 +52,7 @@ struct ipc_perm #ifdef __KERNEL__ #include +#include #define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */ diff --git a/include/linux/leds.h b/include/linux/leds.h index 88afceffb7cb..494bed7c2fc1 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -13,7 +13,6 @@ #define __LINUX_LEDS_H_INCLUDED #include -#include struct device; struct class_device; diff --git a/include/linux/module.h b/include/linux/module.h index e6e0f86ef5fc..b6a646cea1cb 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -6,7 +6,6 @@ * Rewritten by Richard Henderson Dec 1996 * Rewritten again by Rusty Russell, 2002 */ -#include #include #include #include diff --git a/include/linux/percpu.h b/include/linux/percpu.h index b72be2f79e6a..926adaae0f96 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -1,7 +1,7 @@ #ifndef __LINUX_PERCPU_H #define __LINUX_PERCPU_H -#include /* For preempt_disable() */ +#include #include /* For kmalloc() */ #include #include /* For memset() */ diff --git a/include/linux/scx200_gpio.h b/include/linux/scx200_gpio.h index 1a82d30c4b17..d2b058130eb1 100644 --- a/include/linux/scx200_gpio.h +++ b/include/linux/scx200_gpio.h @@ -1,5 +1,3 @@ -#include - u32 scx200_gpio_configure(unsigned index, u32 set, u32 clear); extern unsigned scx200_gpio_base; diff --git a/include/linux/signal.h b/include/linux/signal.h index 9a5eac508e5e..ea91abe740da 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -6,7 +6,6 @@ #ifdef __KERNEL__ #include -#include /* * Real Time signals may be queued. diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index cf715a40d833..58962c51dee1 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -3,7 +3,6 @@ #ifdef CONFIG_LOCK_KERNEL #include -#include #define kernel_locked() (current->lock_depth >= 0) diff --git a/include/linux/timer.h b/include/linux/timer.h index c661710d3627..2b59e6d4219c 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -3,7 +3,6 @@ #include #include -#include #include struct tvec_t_base_s; -- cgit v1.2.3 From e8d6c554126b830217c5e9f549e0e21f865a0a8a Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 15 Jul 2007 23:40:12 -0700 Subject: AFS: implement file locking Implement file locking for AFS. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 51c938a71dec..aa4530c1ff7a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -820,6 +820,10 @@ struct file_lock { union { struct nfs_lock_info nfs_fl; struct nfs4_lock_info nfs4_fl; + struct { + struct list_head link; /* link in AFS vnode's pending_locks list */ + int state; /* state of grant or error if -ve */ + } afs; } fl_u; }; -- cgit v1.2.3 From 9e7bf24b1b979db256ddc84d0d4ac6040d706da6 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 15 Jul 2007 23:40:25 -0700 Subject: fs: clarify "dummy" member in struct inodes_stat_t Signed-off-by: Stefan Richter Acked-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index aa4530c1ff7a..e68780810279 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -44,7 +44,7 @@ extern int get_max_files(void); struct inodes_stat_t { int nr_inodes; int nr_unused; - int dummy[5]; + int dummy[5]; /* padding for sysctl ABI compatibility */ }; extern struct inodes_stat_t inodes_stat; -- cgit v1.2.3 From 1b0fac45878bb88759eec347c273285195649ff7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 15 Jul 2007 23:40:26 -0700 Subject: dma-mapping: prevent dma dependent code from linking on !HAS_DMA archs Continuing the work started in 411f0f3edc141a582190d3605cadd1d993abb6df ... This enables code with a dma path, that compiles away, to build without requiring additional code factoring. It also prevents code that calls dma_alloc_coherent and dma_free_coherent from linking whereas previously the code would hit a BUG() at run time. Finally, it allows archs that set !HAS_DMA to delete their asm/dma-mapping.h file. Cc: Cornelia Huck Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: John W. Linville Cc: Kyle McMartin Cc: James Bottomley Cc: Tejun Heo Cc: Jeff Garzik Cc: Cc: Cc: Cc: Signed-off-by: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dma-mapping.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 9a663c6db16a..2dc21cbeb304 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -31,7 +31,11 @@ static inline int valid_dma_direction(int dma_direction) (dma_direction == DMA_FROM_DEVICE)); } +#ifdef CONFIG_HAS_DMA #include +#else +#include +#endif /* Backwards compat, remove in 2.7.x */ #define dma_sync_single dma_sync_single_for_cpu -- cgit v1.2.3 From e0807061908a7a9441d0f745deb444f7216904cb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 15 Jul 2007 23:40:30 -0700 Subject: remove odd and misleading comments from uio.h Signed-off-by: Christoph Hellwig Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/uio.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 9af8bbcd8963..b7fe13883bdb 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -13,10 +13,6 @@ * 2 of the License, or (at your option) any later version. */ - -/* A word of warning: Our uio structure will clash with the C library one (which is now obsolete). Remove the C - library one from sys/uio.h if you have a very old library set */ - struct iovec { void __user *iov_base; /* BSD uses caddr_t (1003.1g requires void *) */ @@ -38,11 +34,6 @@ struct kvec { #define UIO_FASTIOV 8 #define UIO_MAXIOV 1024 -#if 0 -#define UIO_MAXIOV 16 /* Maximum iovec's in one operation - 16 matches BSD */ - /* Beg pardon: BSD has 1024 --ANK */ -#endif /* * Total number of bytes covered by an iovec. -- cgit v1.2.3 From c5c061b8f9726bc2c25e19dec227933a13d1e6b7 Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Sun, 15 Jul 2007 23:40:30 -0700 Subject: Add a flag to indicate deferrable timers in /proc/timer_stats Add a flag in /proc/timer_stats to indicate deferrable timers. This will let developers/users to differentiate between types of tiemrs in /proc/timer_stats. Deferrable timer and normal timer will appear in /proc/timer_stats as below. 10D, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) 10, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) Also version of timer_stats changes from v0.1 to v0.2 Signed-off-by: Venkatesh Pallipadi Acked-by: Ingo Molnar Cc: Thomas Gleixner Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 5 +++-- include/linux/timer.h | 15 ++++----------- 2 files changed, 7 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 17c29dca8354..540799bc85f8 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -329,12 +329,13 @@ extern void sysrq_timer_list_show(void); #ifdef CONFIG_TIMER_STATS extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, - void *timerf, char * comm); + void *timerf, char *comm, + unsigned int timer_flag); static inline void timer_stats_account_hrtimer(struct hrtimer *timer) { timer_stats_update_stats(timer, timer->start_pid, timer->start_site, - timer->function, timer->start_comm); + timer->function, timer->start_comm, 0); } extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, diff --git a/include/linux/timer.h b/include/linux/timer.h index 2b59e6d4219c..78cf899b4409 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -90,16 +90,13 @@ extern unsigned long get_next_timer_interrupt(unsigned long now); */ #ifdef CONFIG_TIMER_STATS +#define TIMER_STATS_FLAG_DEFERRABLE 0x1 + extern void init_timer_stats(void); extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, - void *timerf, char * comm); - -static inline void timer_stats_account_timer(struct timer_list *timer) -{ - timer_stats_update_stats(timer, timer->start_pid, timer->start_site, - timer->function, timer->start_comm); -} + void *timerf, char *comm, + unsigned int timer_flag); extern void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr); @@ -118,10 +115,6 @@ static inline void init_timer_stats(void) { } -static inline void timer_stats_account_timer(struct timer_list *timer) -{ -} - static inline void timer_stats_timer_set_start_info(struct timer_list *timer) { } -- cgit v1.2.3 From 4a19542e5f694cd408a32c3d9dc593ba9366e2d7 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sun, 15 Jul 2007 23:40:34 -0700 Subject: O_CLOEXEC for SCM_RIGHTS Part two in the O_CLOEXEC saga: adding support for file descriptors received through Unix domain sockets. The patch is once again pretty minimal, it introduces a new flag for recvmsg and passes it just like the existing MSG_CMSG_COMPAT flag. I think this bit is not used otherwise but the networking people will know better. This new flag is not recognized by recvfrom and recv. These functions cannot be used for that purpose and the asymmetry this introduces is not worse than the already existing MSG_CMSG_COMPAT situations. The patch must be applied on the patch which introduced O_CLOEXEC. It has to remove static from the new get_unused_fd_flags function but since scm.c cannot live in a module the function still hasn't to be exported. Here's a test program to make sure the code works. It's so much longer than the actual patch... #include #include #include #include #include #include #include #include #ifndef O_CLOEXEC # define O_CLOEXEC 02000000 #endif #ifndef MSG_CMSG_CLOEXEC # define MSG_CMSG_CLOEXEC 0x40000000 #endif int main (int argc, char *argv[]) { if (argc > 1) { int fd = atol (argv[1]); printf ("child: fd = %d\n", fd); if (fcntl (fd, F_GETFD) == 0 || errno != EBADF) { puts ("file descriptor valid in child"); return 1; } return 0; } struct sockaddr_un sun; strcpy (sun.sun_path, "./testsocket"); sun.sun_family = AF_UNIX; char databuf[] = "hello"; struct iovec iov[1]; iov[0].iov_base = databuf; iov[0].iov_len = sizeof (databuf); union { struct cmsghdr hdr; char bytes[CMSG_SPACE (sizeof (int))]; } buf; struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1, .msg_control = buf.bytes, .msg_controllen = sizeof (buf) }; struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; cmsg->cmsg_len = CMSG_LEN (sizeof (int)); msg.msg_controllen = cmsg->cmsg_len; pid_t child = fork (); if (child == -1) error (1, errno, "fork"); if (child == 0) { int sock = socket (PF_UNIX, SOCK_STREAM, 0); if (sock < 0) error (1, errno, "socket"); if (bind (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0) error (1, errno, "bind"); if (listen (sock, SOMAXCONN) < 0) error (1, errno, "listen"); int conn = accept (sock, NULL, NULL); if (conn == -1) error (1, errno, "accept"); *(int *) CMSG_DATA (cmsg) = sock; if (sendmsg (conn, &msg, MSG_NOSIGNAL) < 0) error (1, errno, "sendmsg"); return 0; } /* For a test suite this should be more robust like a barrier in shared memory. */ sleep (1); int sock = socket (PF_UNIX, SOCK_STREAM, 0); if (sock < 0) error (1, errno, "socket"); if (connect (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0) error (1, errno, "connect"); unlink (sun.sun_path); *(int *) CMSG_DATA (cmsg) = -1; if (recvmsg (sock, &msg, MSG_CMSG_CLOEXEC) < 0) error (1, errno, "recvmsg"); int fd = *(int *) CMSG_DATA (cmsg); if (fd == -1) error (1, 0, "no descriptor received"); char fdname[20]; snprintf (fdname, sizeof (fdname), "%d", fd); execl ("/proc/self/exe", argv[0], fdname, NULL); puts ("execl failed"); return 1; } [akpm@linux-foundation.org: Fix fastcall inconsistency noted by Michael Buesch] [akpm@linux-foundation.org: build fix] Signed-off-by: Ulrich Drepper Cc: Ingo Molnar Cc: Michael Buesch Cc: Michael Kerrisk Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/file.h | 1 + include/linux/socket.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/file.h b/include/linux/file.h index a59001e9ea58..0114fbc78061 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -73,6 +73,7 @@ extern struct file * FASTCALL(fget_light(unsigned int fd, int *fput_needed)); extern void FASTCALL(set_close_on_exec(unsigned int fd, int flag)); extern void put_filp(struct file *); extern int get_unused_fd(void); +extern int get_unused_fd_flags(int flags); extern void FASTCALL(put_unused_fd(unsigned int fd)); struct kmem_cache; diff --git a/include/linux/socket.h b/include/linux/socket.h index fe195c97a89d..f852e1afd65a 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -253,6 +253,9 @@ struct ucred { #define MSG_EOF MSG_FIN +#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exit for file + descriptor received through + SCM_RIGHTS */ #if defined(CONFIG_COMPAT) #define MSG_CMSG_COMPAT 0x80000000 /* This message needs 32 bit fixups */ #else -- cgit v1.2.3 From aa0ac36518be648dda3a32f0b37a8b2b546e1b24 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 15 Jul 2007 23:40:39 -0700 Subject: Remove capability.h from mm.h I forgot to remove capability.h from mm.h while removing sched.h! This patch remedies that, because the only inline function which was using CAP_something was made out of line. Cross-compile tested without regressions on: all powerpc defconfigs all mips defconfigs all m68k defconfigs all arm defconfigs all ia64 defconfigs alpha alpha-allnoconfig alpha-defconfig alpha-up arm i386 i386-allnoconfig i386-defconfig i386-up ia64 ia64-allnoconfig ia64-defconfig ia64-up m68k mips parisc parisc-allnoconfig parisc-defconfig parisc-up powerpc powerpc-up s390 s390-allnoconfig s390-defconfig s390-up sparc sparc-allnoconfig sparc-defconfig sparc-up sparc64 sparc64-allnoconfig sparc64-defconfig sparc64-up um-x86_64 x86_64 x86_64-allnoconfig x86_64-defconfig x86_64-up as well as my two usual configs. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index d4ab4e590e23..97d0cddfd223 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2,7 +2,6 @@ #define _LINUX_MM_H #include -#include #ifdef __KERNEL__ -- cgit v1.2.3 From 759448f459234bfcf34b82471f0dba77a9aca498 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sun, 15 Jul 2007 23:40:40 -0700 Subject: Kernel utf-8 handling This patch fixes dead keys and copy/paste of non-ASCII characters in UTF-8 mode on Linux console. See more details about the original patch at: http://chris.heathens.co.nz/linux/utf8.html Already posted on (Oldest) http://lkml.org/lkml/2003/5/31/148 http://lkml.org/lkml/2005/12/24/69 (Recent) http://lkml.org/lkml/2006/8/7/75 [bunk@stusta.de: make drivers/char/selection.c:store_utf8() static] Signed-off-by: Jan Engelhardt Cc: Alexander E. Patrakov Cc: Dmitry Torokhov Cc: "Antonino A. Daplas" Signed-off-by: Adrian Bunk Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/consolemap.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index 82c9a1f11020..06b2768c603f 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -8,9 +8,12 @@ #define IBMPC_MAP 2 #define USER_MAP 3 +#include + struct vc_data; -extern unsigned char inverse_translate(struct vc_data *conp, int glyph); +extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode); extern unsigned short *set_translate(int m, struct vc_data *vc); extern int conv_uni_to_pc(struct vc_data *conp, long ucs); +extern u32 conv_8bit_to_uni(unsigned char c); void console_map_init(void); -- cgit v1.2.3 From c289dca37917338fc8ab2e0d7e202a1c927e229e Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 15 Jul 2007 23:40:42 -0700 Subject: remove sonypi_camera_command() Remove the no longer used sonypi_camera_command(). Signed-off-by: Adrian Bunk Acked-by: Mattia Dongili Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sonypi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sonypi.h b/include/linux/sonypi.h index 34d4b075f7b8..40c7b5d993b9 100644 --- a/include/linux/sonypi.h +++ b/include/linux/sonypi.h @@ -153,8 +153,6 @@ #define SONYPI_COMMAND_GETCAMERAROMVERSION 18 /* obsolete */ #define SONYPI_COMMAND_GETCAMERAREVISION 19 /* obsolete */ -int sonypi_camera_command(int command, u8 value); - #endif /* __KERNEL__ */ #endif /* _SONYPI_H_ */ -- cgit v1.2.3 From dcae56ea661e13d8f904b584bbe4c1e50c7ee548 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Sun, 15 Jul 2007 23:40:45 -0700 Subject: Drop an empty isicom.h from being exported to user space. Drop from being exported to user space since it would be only an empty file. Signed-off-by: Robert P. J. Day Acked-by: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/Kbuild | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 127d2d192b5a..bcf875e844fe 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -247,7 +247,6 @@ unifdef-y += isdn.h unifdef-y += isdnif.h unifdef-y += isdn_divertif.h unifdef-y += isdn_ppp.h -unifdef-y += isicom.h unifdef-y += jbd.h unifdef-y += joystick.h unifdef-y += kdev_t.h -- cgit v1.2.3 From b663a79c191508f27cd885224b592a878c0ba0f6 Mon Sep 17 00:00:00 2001 From: Maxim Uvarov Date: Sun, 15 Jul 2007 23:40:48 -0700 Subject: taskstats: add context-switch counters Make available to the user the following task and process performance statistics: * Involuntary Context Switches (task_struct->nivcsw) * Voluntary Context Switches (task_struct->nvcsw) Statistics information is available from: 1. taskstats interface (Documentation/accounting/) 2. /proc/PID/status (task only). This data is useful for detecting hyperactivity patterns between processes. [akpm@linux-foundation.org: cleanup] Signed-off-by: Maxim Uvarov Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jay Lan Cc: Jonathan Lim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/taskstats.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index a46104a28f66..dce1ed204972 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h @@ -31,7 +31,7 @@ */ -#define TASKSTATS_VERSION 4 +#define TASKSTATS_VERSION 5 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -149,6 +149,9 @@ struct taskstats { __u64 read_bytes; /* bytes of read I/O */ __u64 write_bytes; /* bytes of write I/O */ __u64 cancelled_write_bytes; /* bytes of cancelled write I/O */ + + __u64 nvcsw; /* voluntary_ctxt_switches */ + __u64 nivcsw; /* nonvoluntary_ctxt_switches */ }; -- cgit v1.2.3 From 5216184571946b8bbf06f0cd630c7754190fdd1a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sun, 15 Jul 2007 23:40:51 -0700 Subject: fix typo in prefetch.h Signed-off-by: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prefetch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h index fc86f274147f..1adfe668d031 100644 --- a/include/linux/prefetch.h +++ b/include/linux/prefetch.h @@ -27,7 +27,7 @@ prefetch(x) - prefetches the cacheline at "x" for read prefetchw(x) - prefetches the cacheline at "x" for write - spin_lock_prefetch(x) - prefectches the spinlock *x for taking + spin_lock_prefetch(x) - prefetches the spinlock *x for taking there is also PREFETCH_STRIDE which is the architecure-prefered "lookahead" size for prefetching streamed operations. -- cgit v1.2.3 From 4f27c00bf80f122513d3a5be16ed851573164534 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Sun, 15 Jul 2007 23:40:55 -0700 Subject: Improve behaviour of spurious IRQ detect Currently we handle spurious IRQ activity based upon seeing a lot of invalid interrupts, and we clear things back on the base of lots of valid interrupts. Unfortunately in some cases you get legitimate invalid interrupts caused by timing asynchronicity between the PCI bus and the APIC bus when disabling interrupts and pulling other tricks. In this case although the spurious IRQs are not a problem our unhandled counters didn't clear and they act as a slow running timebomb. (This is effectively what the serial port/tty problem that was fixed by clearing counters when registering a handler showed up) It's easy enough to add a second parameter - time. This means that if we see a regular stream of harmless spurious interrupts which are not harming processing we don't go off and do something stupid like disable the IRQ after a month of running. OTOH lockups and performance killers show up a lot more than 10/second [akpm@linux-foundation.org: cleanup] Signed-off-by: Alan Cox Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 1695054e8c63..44657197fcb0 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -161,6 +161,7 @@ struct irq_desc { unsigned int wake_depth; /* nested wake enables */ unsigned int irq_count; /* For detecting broken IRQs */ unsigned int irqs_unhandled; + unsigned long last_unhandled; /* Aging timer for unhandled count */ spinlock_t lock; #ifdef CONFIG_SMP cpumask_t affinity; -- cgit v1.2.3 From 522ed7767e800cff6c650ec64b0ee0677303119c Mon Sep 17 00:00:00 2001 From: Miloslav Trmac Date: Sun, 15 Jul 2007 23:40:56 -0700 Subject: Audit: add TTY input auditing Add TTY input auditing, used to audit system administrator's actions. This is required by various security standards such as DCID 6/3 and PCI to provide non-repudiation of administrator's actions and to allow a review of past actions if the administrator seems to overstep their duties or if the system becomes misconfigured for unknown reasons. These requirements do not make it necessary to audit TTY output as well. Compared to an user-space keylogger, this approach records TTY input using the audit subsystem, correlated with other audit events, and it is completely transparent to the user-space application (e.g. the console ioctls still work). TTY input auditing works on a higher level than auditing all system calls within the session, which would produce an overwhelming amount of mostly useless audit events. Add an "audit_tty" attribute, inherited across fork (). Data read from TTYs by process with the attribute is sent to the audit subsystem by the kernel. The audit netlink interface is extended to allow modifying the audit_tty attribute, and to allow sending explanatory audit events from user-space (for example, a shell might send an event containing the final command, after the interactive command-line editing and history expansion is performed, which might be difficult to decipher from the TTY input alone). Because the "audit_tty" attribute is inherited across fork (), it would be set e.g. for sshd restarted within an audited session. To prevent this, the audit_tty attribute is cleared when a process with no open TTY file descriptors (e.g. after daemon startup) opens a TTY. See https://www.redhat.com/archives/linux-audit/2007-June/msg00000.html for a more detailed rationale document for an older version of this patch. [akpm@linux-foundation.org: build fix] Signed-off-by: Miloslav Trmac Cc: Al Viro Cc: Alan Cox Cc: Paul Fulghum Cc: Casey Schaufler Cc: Steve Grubb Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/audit.h | 11 +++++++++++ include/linux/sched.h | 4 ++++ include/linux/tty.h | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index fccc6e50298a..8ca7ca0b47f0 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -63,9 +63,12 @@ #define AUDIT_ADD_RULE 1011 /* Add syscall filtering rule */ #define AUDIT_DEL_RULE 1012 /* Delete syscall filtering rule */ #define AUDIT_LIST_RULES 1013 /* List syscall filtering rules */ +#define AUDIT_TTY_GET 1014 /* Get TTY auditing status */ +#define AUDIT_TTY_SET 1015 /* Set TTY auditing status */ #define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages mostly uninteresting to kernel */ #define AUDIT_USER_AVC 1107 /* We filter this differently */ +#define AUDIT_USER_TTY 1124 /* Non-ICANON TTY input meaning */ #define AUDIT_LAST_USER_MSG 1199 #define AUDIT_FIRST_USER_MSG2 2100 /* More user space messages */ #define AUDIT_LAST_USER_MSG2 2999 @@ -92,6 +95,7 @@ #define AUDIT_KERNEL_OTHER 1316 /* For use by 3rd party modules */ #define AUDIT_FD_PAIR 1317 /* audit record for pipe/socketpair */ #define AUDIT_OBJ_PID 1318 /* ptrace target */ +#define AUDIT_TTY 1319 /* Input on an administrative TTY */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ @@ -289,6 +293,10 @@ struct audit_status { __u32 backlog; /* messages waiting in queue */ }; +struct audit_tty_status { + __u32 enabled; /* 1 = enabled, 0 = disabled */ +}; + /* audit_rule_data supports filter rules with both integer and string * fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and * AUDIT_LIST_RULES requests. @@ -515,11 +523,13 @@ extern void audit_log_d_path(struct audit_buffer *ab, const char *prefix, struct dentry *dentry, struct vfsmount *vfsmnt); +extern void audit_log_lost(const char *message); /* Private API (for audit.c only) */ extern int audit_filter_user(struct netlink_skb_parms *cb, int type); extern int audit_filter_type(int type); extern int audit_receive_filter(int type, int pid, int uid, int seq, void *data, size_t datasz, uid_t loginuid, u32 sid); +extern int audit_enabled; #else #define audit_log(c,g,t,f,...) do { ; } while (0) #define audit_log_start(c,g,t) ({ NULL; }) @@ -530,6 +540,7 @@ extern int audit_receive_filter(int type, int pid, int uid, int seq, #define audit_log_untrustedstring(a,s) do { ; } while (0) #define audit_log_n_untrustedstring(a,n,s) do { ; } while (0) #define audit_log_d_path(b,p,d,v) do { ; } while (0) +#define audit_enabled 0 #endif #endif #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 3cffc1204663..b579624477f4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -529,6 +529,10 @@ struct signal_struct { #ifdef CONFIG_TASKSTATS struct taskstats *stats; #endif +#ifdef CONFIG_AUDIT + unsigned audit_tty; + struct tty_audit_buf *tty_audit_buf; +#endif }; /* Context switch must be unlocked if interrupts are to be enabled */ diff --git a/include/linux/tty.h b/include/linux/tty.h index deaba9ec5004..691a1748d9d2 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -178,6 +178,7 @@ struct tty_bufhead { #define L_IEXTEN(tty) _L_FLAG((tty),IEXTEN) struct device; +struct signal_struct; /* * Where all of the state associated with a tty is kept while the tty * is open. Since the termios state should be kept even if the tty @@ -310,6 +311,7 @@ extern void tty_hangup(struct tty_struct * tty); extern void tty_vhangup(struct tty_struct * tty); extern void tty_unhangup(struct file *filp); extern int tty_hung_up_p(struct file * filp); +extern int is_tty(struct file *filp); extern void do_SAK(struct tty_struct *tty); extern void __do_SAK(struct tty_struct *tty); extern void disassociate_ctty(int priv); @@ -347,6 +349,37 @@ extern int tty_write_lock(struct tty_struct *tty, int ndelay); /* n_tty.c */ extern struct tty_ldisc tty_ldisc_N_TTY; +/* tty_audit.c */ +#ifdef CONFIG_AUDIT +extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, + size_t size); +extern void tty_audit_exit(void); +extern void tty_audit_fork(struct signal_struct *sig); +extern void tty_audit_push(struct tty_struct *tty); +extern void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid); +extern void tty_audit_opening(void); +#else +static inline void tty_audit_add_data(struct tty_struct *tty, + unsigned char *data, size_t size) +{ +} +static inline void tty_audit_exit(void) +{ +} +static inline void tty_audit_fork(struct signal_struct *sig) +{ +} +static inline void tty_audit_push(struct tty_struct *tty) +{ +} +static inline void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid) +{ +} +static inline void tty_audit_opening(void) +{ +} +#endif + /* tty_ioctl.c */ extern int n_tty_ioctl(struct tty_struct * tty, struct file * file, unsigned int cmd, unsigned long arg); -- cgit v1.2.3 From 7d69a1f4a72b18876c99c697692b78339d491568 Mon Sep 17 00:00:00 2001 From: Cedric Le Goater Date: Sun, 15 Jul 2007 23:40:58 -0700 Subject: remove CONFIG_UTS_NS and CONFIG_IPC_NS CONFIG_UTS_NS and CONFIG_IPC_NS have very little value as they only deactivate the unshare of the uts and ipc namespaces and do not improve performance. Signed-off-by: Cedric Le Goater Acked-by: "Serge E. Hallyn" Cc: Eric W. Biederman Cc: Herbert Poetzl Cc: Pavel Emelianov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc.h | 11 +++-------- include/linux/utsname.h | 13 ------------- 2 files changed, 3 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc.h b/include/linux/ipc.h index 7c8c6d8d090c..3fd3ddd5f0d9 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -93,6 +93,7 @@ extern struct ipc_namespace init_ipc_ns; #ifdef CONFIG_SYSVIPC #define INIT_IPC_NS(ns) .ns = &init_ipc_ns, +extern void free_ipc_ns(struct kref *kref); extern struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns); #else @@ -104,13 +105,9 @@ static inline struct ipc_namespace *copy_ipcs(unsigned long flags, } #endif -#ifdef CONFIG_IPC_NS -extern void free_ipc_ns(struct kref *kref); -#endif - static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { -#ifdef CONFIG_IPC_NS +#ifdef CONFIG_SYSVIPC if (ns) kref_get(&ns->kref); #endif @@ -119,7 +116,7 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) static inline void put_ipc_ns(struct ipc_namespace *ns) { -#ifdef CONFIG_IPC_NS +#ifdef CONFIG_SYSVIPC kref_put(&ns->kref, free_ipc_ns); #endif } @@ -127,5 +124,3 @@ static inline void put_ipc_ns(struct ipc_namespace *ns) #endif /* __KERNEL__ */ #endif /* _LINUX_IPC_H */ - - diff --git a/include/linux/utsname.h b/include/linux/utsname.h index f8d3b326e93a..51ad167611e4 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -48,7 +48,6 @@ static inline void get_uts_ns(struct uts_namespace *ns) kref_get(&ns->kref); } -#ifdef CONFIG_UTS_NS extern struct uts_namespace *copy_utsname(int flags, struct uts_namespace *ns); extern void free_uts_ns(struct kref *kref); @@ -56,18 +55,6 @@ static inline void put_uts_ns(struct uts_namespace *ns) { kref_put(&ns->kref, free_uts_ns); } -#else -static inline struct uts_namespace *copy_utsname(int flags, - struct uts_namespace *ns) -{ - return ns; -} - -static inline void put_uts_ns(struct uts_namespace *ns) -{ -} -#endif - static inline struct new_utsname *utsname(void) { return ¤t->nsproxy->uts_ns->name; -- cgit v1.2.3 From acce292c82d4d82d35553b928df2b0597c3a9c78 Mon Sep 17 00:00:00 2001 From: Cedric Le Goater Date: Sun, 15 Jul 2007 23:40:59 -0700 Subject: user namespace: add the framework Basically, it will allow a process to unshare its user_struct table, resetting at the same time its own user_struct and all the associated accounting. A new root user (uid == 0) is added to the user namespace upon creation. Such root users have full privileges and it seems that theses privileges should be controlled through some means (process capabilities ?) The unshare is not included in this patch. Changes since [try #4]: - Updated get_user_ns and put_user_ns to accept NULL, and get_user_ns to return the namespace. Changes since [try #3]: - moved struct user_namespace to files user_namespace.{c,h} Changes since [try #2]: - removed struct user_namespace* argument from find_user() Changes since [try #1]: - removed struct user_namespace* argument from find_user() - added a root_user per user namespace Signed-off-by: Cedric Le Goater Signed-off-by: Serge E. Hallyn Acked-by: Pavel Emelianov Cc: Herbert Poetzl Cc: Kirill Korotaev Cc: Eric W. Biederman Cc: Chris Wright Cc: Stephen Smalley Cc: James Morris Cc: Andrew Morgan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init_task.h | 2 ++ include/linux/nsproxy.h | 1 + include/linux/sched.h | 3 ++- include/linux/user_namespace.h | 57 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 include/linux/user_namespace.h (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 276ccaa2670c..cab741c2d603 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -8,6 +8,7 @@ #include #include #include +#include #define INIT_FDTABLE \ { \ @@ -78,6 +79,7 @@ extern struct nsproxy init_nsproxy; .uts_ns = &init_uts_ns, \ .mnt_ns = NULL, \ INIT_IPC_NS(ipc_ns) \ + .user_ns = &init_user_ns, \ } #define INIT_SIGHAND(sighand) { \ diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 189e0dc993ab..6d179a397bfb 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -28,6 +28,7 @@ struct nsproxy { struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns; + struct user_namespace *user_ns; }; extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index b579624477f4..c667255d70db 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -287,6 +287,7 @@ extern signed long schedule_timeout_uninterruptible(signed long timeout); asmlinkage void schedule(void); struct nsproxy; +struct user_namespace; /* Maximum number of active map areas.. This is a random (large) number */ #define DEFAULT_MAX_MAP_COUNT 65536 @@ -1408,7 +1409,7 @@ extern struct task_struct *find_task_by_pid_type(int type, int pid); extern void __set_special_pids(pid_t session, pid_t pgrp); /* per-UID process charging. */ -extern struct user_struct * alloc_uid(uid_t); +extern struct user_struct * alloc_uid(struct user_namespace *, uid_t); static inline struct user_struct *get_uid(struct user_struct *u) { atomic_inc(&u->__count); diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h new file mode 100644 index 000000000000..92a45867ecfb --- /dev/null +++ b/include/linux/user_namespace.h @@ -0,0 +1,57 @@ +#ifndef _LINUX_USER_NAMESPACE_H +#define _LINUX_USER_NAMESPACE_H + +#include +#include +#include + +#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8) +#define UIDHASH_SZ (1 << UIDHASH_BITS) + +struct user_namespace { + struct kref kref; + struct list_head uidhash_table[UIDHASH_SZ]; + struct user_struct *root_user; +}; + +extern struct user_namespace init_user_ns; + +#ifdef CONFIG_USER_NS + +static inline struct user_namespace *get_user_ns(struct user_namespace *ns) +{ + if (ns) + kref_get(&ns->kref); + return ns; +} + +extern struct user_namespace *copy_user_ns(int flags, + struct user_namespace *old_ns); +extern void free_user_ns(struct kref *kref); + +static inline void put_user_ns(struct user_namespace *ns) +{ + if (ns) + kref_put(&ns->kref, free_user_ns); +} + +#else + +static inline struct user_namespace *get_user_ns(struct user_namespace *ns) +{ + return &init_user_ns; +} + +static inline struct user_namespace *copy_user_ns(int flags, + struct user_namespace *old_ns) +{ + return NULL; +} + +static inline void put_user_ns(struct user_namespace *ns) +{ +} + +#endif + +#endif /* _LINUX_USER_H */ -- cgit v1.2.3 From 77ec739d8d0979477fc91f530403805afa2581a4 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Sun, 15 Jul 2007 23:41:01 -0700 Subject: user namespace: add unshare This patch enables the unshare of user namespaces. It adds a new clone flag CLONE_NEWUSER and implements copy_user_ns() which resets the current user_struct and adds a new root user (uid == 0) For now, unsharing the user namespace allows a process to reset its user_struct accounting and uid 0 in the new user namespace should be contained using appropriate means, for instance selinux The plan, when the full support is complete (all uid checks covered), is to keep the original user's rights in the original namespace, and let a process become uid 0 in the new namespace, with full capabilities to the new namespace. Signed-off-by: Serge E. Hallyn Signed-off-by: Cedric Le Goater Acked-by: Pavel Emelianov Cc: Herbert Poetzl Cc: Kirill Korotaev Cc: Eric W. Biederman Cc: Chris Wright Cc: Stephen Smalley Cc: James Morris Cc: Andrew Morgan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + include/linux/user_namespace.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c667255d70db..731edaca8ffd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -26,6 +26,7 @@ #define CLONE_STOPPED 0x02000000 /* Start in stopped state */ #define CLONE_NEWUTS 0x04000000 /* New utsname group? */ #define CLONE_NEWIPC 0x08000000 /* New ipcs */ +#define CLONE_NEWUSER 0x10000000 /* New user namespace */ /* * Scheduling policies diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 92a45867ecfb..bb320573bb9e 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -4,6 +4,7 @@ #include #include #include +#include #define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8) #define UIDHASH_SZ (1 << UIDHASH_BITS) @@ -45,6 +46,9 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) static inline struct user_namespace *copy_user_ns(int flags, struct user_namespace *old_ns) { + if (flags & CLONE_NEWUSER) + return ERR_PTR(-EINVAL); + return NULL; } -- cgit v1.2.3 From ea5a3dcfda1c9140228f2842ea9b01e1713c559a Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Sun, 15 Jul 2007 23:41:04 -0700 Subject: COBALT: remove all references to Cobalt NVRAM Remove not only the references to Cobalt NVRAM, but the header file as well. Signed-off-by: Robert P. J. Day Acked-by: Tim Hockin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cobalt-nvram.h | 109 ------------------------------------------- 1 file changed, 109 deletions(-) delete mode 100644 include/linux/cobalt-nvram.h (limited to 'include/linux') diff --git a/include/linux/cobalt-nvram.h b/include/linux/cobalt-nvram.h deleted file mode 100644 index ea429562ff36..000000000000 --- a/include/linux/cobalt-nvram.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * $Id: cobalt-nvram.h,v 1.20 2001/10/17 23:16:55 thockin Exp $ - * cobalt-nvram.h : defines for the various fields in the cobalt NVRAM - * - * Copyright 2001,2002 Sun Microsystems, Inc. - */ - -#ifndef COBALT_NVRAM_H -#define COBALT_NVRAM_H - -#include - -#define COBT_CMOS_INFO_MAX 0x7f /* top address allowed */ -#define COBT_CMOS_BIOS_DRIVE_INFO 0x12 /* drive info would go here */ - -#define COBT_CMOS_CKS_START NVRAM_OFFSET(0x0e) -#define COBT_CMOS_CKS_END NVRAM_OFFSET(0x7f) - -/* flag bytes - 16 flags for now, leave room for more */ -#define COBT_CMOS_FLAG_BYTE_0 NVRAM_OFFSET(0x10) -#define COBT_CMOS_FLAG_BYTE_1 NVRAM_OFFSET(0x11) - -/* flags in flag bytes - up to 16 */ -#define COBT_CMOS_FLAG_MIN 0x0001 -#define COBT_CMOS_CONSOLE_FLAG 0x0001 /* console on/off */ -#define COBT_CMOS_DEBUG_FLAG 0x0002 /* ROM debug messages */ -#define COBT_CMOS_AUTO_PROMPT_FLAG 0x0004 /* boot to ROM prompt? */ -#define COBT_CMOS_CLEAN_BOOT_FLAG 0x0008 /* set by a clean shutdown */ -#define COBT_CMOS_HW_NOPROBE_FLAG 0x0010 /* go easy on the probing */ -#define COBT_CMOS_SYSFAULT_FLAG 0x0020 /* system fault detected */ -#define COBT_CMOS_OOPSPANIC_FLAG 0x0040 /* panic on oops */ -#define COBT_CMOS_DELAY_CACHE_FLAG 0x0080 /* delay cache initialization */ -#define COBT_CMOS_NOLOGO_FLAG 0x0100 /* hide "C" logo @ boot */ -#define COBT_CMOS_VERSION_FLAG 0x0200 /* the version field is valid */ -#define COBT_CMOS_FLAG_MAX 0x0200 - -/* leave byte 0x12 blank - Linux looks for drive info here */ - -/* CMOS structure version, valid if COBT_CMOS_VERSION_FLAG is true */ -#define COBT_CMOS_VERSION NVRAM_OFFSET(0x13) -#define COBT_CMOS_VER_BTOCODE 1 /* min. version needed for btocode */ - -/* index of default boot method */ -#define COBT_CMOS_BOOT_METHOD NVRAM_OFFSET(0x20) -#define COBT_CMOS_BOOT_METHOD_DISK 0 -#define COBT_CMOS_BOOT_METHOD_ROM 1 -#define COBT_CMOS_BOOT_METHOD_NET 2 - -#define COBT_CMOS_BOOT_DEV_MIN NVRAM_OFFSET(0x21) -/* major #, minor # of first through fourth boot device */ -#define COBT_CMOS_BOOT_DEV0_MAJ NVRAM_OFFSET(0x21) -#define COBT_CMOS_BOOT_DEV0_MIN NVRAM_OFFSET(0x22) -#define COBT_CMOS_BOOT_DEV1_MAJ NVRAM_OFFSET(0x23) -#define COBT_CMOS_BOOT_DEV1_MIN NVRAM_OFFSET(0x24) -#define COBT_CMOS_BOOT_DEV2_MAJ NVRAM_OFFSET(0x25) -#define COBT_CMOS_BOOT_DEV2_MIN NVRAM_OFFSET(0x26) -#define COBT_CMOS_BOOT_DEV3_MAJ NVRAM_OFFSET(0x27) -#define COBT_CMOS_BOOT_DEV3_MIN NVRAM_OFFSET(0x28) -#define COBT_CMOS_BOOT_DEV_MAX NVRAM_OFFSET(0x28) - -/* checksum of bytes 0xe-0x7f */ -#define COBT_CMOS_CHECKSUM NVRAM_OFFSET(0x2e) - -/* running uptime counter, units of 5 minutes (32 bits =~ 41000 years) */ -#define COBT_CMOS_UPTIME_0 NVRAM_OFFSET(0x30) -#define COBT_CMOS_UPTIME_1 NVRAM_OFFSET(0x31) -#define COBT_CMOS_UPTIME_2 NVRAM_OFFSET(0x32) -#define COBT_CMOS_UPTIME_3 NVRAM_OFFSET(0x33) - -/* count of successful boots (32 bits) */ -#define COBT_CMOS_BOOTCOUNT_0 NVRAM_OFFSET(0x38) -#define COBT_CMOS_BOOTCOUNT_1 NVRAM_OFFSET(0x39) -#define COBT_CMOS_BOOTCOUNT_2 NVRAM_OFFSET(0x3a) -#define COBT_CMOS_BOOTCOUNT_3 NVRAM_OFFSET(0x3b) - -/* 13 bytes: system serial number, same as on the back of the system */ -#define COBT_CMOS_SYS_SERNUM_LEN 13 -#define COBT_CMOS_SYS_SERNUM_0 NVRAM_OFFSET(0x40) -#define COBT_CMOS_SYS_SERNUM_1 NVRAM_OFFSET(0x41) -#define COBT_CMOS_SYS_SERNUM_2 NVRAM_OFFSET(0x42) -#define COBT_CMOS_SYS_SERNUM_3 NVRAM_OFFSET(0x43) -#define COBT_CMOS_SYS_SERNUM_4 NVRAM_OFFSET(0x44) -#define COBT_CMOS_SYS_SERNUM_5 NVRAM_OFFSET(0x45) -#define COBT_CMOS_SYS_SERNUM_6 NVRAM_OFFSET(0x46) -#define COBT_CMOS_SYS_SERNUM_7 NVRAM_OFFSET(0x47) -#define COBT_CMOS_SYS_SERNUM_8 NVRAM_OFFSET(0x48) -#define COBT_CMOS_SYS_SERNUM_9 NVRAM_OFFSET(0x49) -#define COBT_CMOS_SYS_SERNUM_10 NVRAM_OFFSET(0x4a) -#define COBT_CMOS_SYS_SERNUM_11 NVRAM_OFFSET(0x4b) -#define COBT_CMOS_SYS_SERNUM_12 NVRAM_OFFSET(0x4c) -/* checksum for serial num - 1 byte */ -#define COBT_CMOS_SYS_SERNUM_CSUM NVRAM_OFFSET(0x4f) - -#define COBT_CMOS_ROM_REV_MAJ NVRAM_OFFSET(0x50) -#define COBT_CMOS_ROM_REV_MIN NVRAM_OFFSET(0x51) -#define COBT_CMOS_ROM_REV_REV NVRAM_OFFSET(0x52) - -#define COBT_CMOS_BTO_CODE_0 NVRAM_OFFSET(0x53) -#define COBT_CMOS_BTO_CODE_1 NVRAM_OFFSET(0x54) -#define COBT_CMOS_BTO_CODE_2 NVRAM_OFFSET(0x55) -#define COBT_CMOS_BTO_CODE_3 NVRAM_OFFSET(0x56) - -#define COBT_CMOS_BTO_IP_CSUM NVRAM_OFFSET(0x57) -#define COBT_CMOS_BTO_IP_0 NVRAM_OFFSET(0x58) -#define COBT_CMOS_BTO_IP_1 NVRAM_OFFSET(0x59) -#define COBT_CMOS_BTO_IP_2 NVRAM_OFFSET(0x5a) -#define COBT_CMOS_BTO_IP_3 NVRAM_OFFSET(0x5b) - -#endif /* COBALT_NVRAM_H */ -- cgit v1.2.3 From dcf5008db171211e3c34c060cacfd788306b034b Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Sun, 15 Jul 2007 23:41:09 -0700 Subject: remove unused lock_cpu_hotplug_interruptible definition aa95387774039096c11803c04011f1aa42d85758 removed the implementation of lock_cpu_hotplug_interruptible and all users of it. This stub definition for !CONFIG_HOTPLUG_CPU was left over -- kill it now. Signed-off-by: Nathan Lynch Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 3b2df2523f1d..c2236bbff412 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -120,7 +120,6 @@ static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex) #define lock_cpu_hotplug() do { } while (0) #define unlock_cpu_hotplug() do { } while (0) -#define lock_cpu_hotplug_interruptible() 0 #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) #define register_hotcpu_notifier(nb) do { (void)(nb); } while (0) #define unregister_hotcpu_notifier(nb) do { (void)(nb); } while (0) -- cgit v1.2.3 From f4895925976977aaeda26ee2a603a99f17db500b Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Sun, 15 Jul 2007 23:41:13 -0700 Subject: Remove final two references to "__obsolete_setup" macro Signed-off-by: Robert P. J. Day Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 56ec4c62eee0..5b5285316339 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -171,9 +171,6 @@ struct obs_kernel_param { #define __setup(str, fn) \ __setup_param(str, fn, fn, 0) -#define __obsolete_setup(str) \ - __setup_null_param(str, __LINE__) - /* NOTE: fn is as per module_param, not __setup! Emits warning if fn * returns non-zero. */ #define early_param(str, fn) \ @@ -239,7 +236,6 @@ void __init parse_early_param(void); #define __setup_param(str, unique_id, fn) /* nothing */ #define __setup_null_param(str, unique_id) /* nothing */ #define __setup(str, func) /* nothing */ -#define __obsolete_setup(str) /* nothing */ #endif /* Data marked not to be saved by software suspend */ -- cgit v1.2.3 From 213dd266d48af90c1eec8688c1ff31aa34d21de2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 15 Jul 2007 23:41:15 -0700 Subject: namespace: ensure clone_flags are always stored in an unsigned long While working on unshare support for the network namespace I noticed we were putting clone flags in an int. Which is weird because the syscall uses unsigned long and we at least need an unsigned to properly hold all of the unshare flags. So to make the code consistent, this patch updates the code to use unsigned long instead of int for the clone flags in those places where we get it wrong today. Signed-off-by: Eric W. Biederman Acked-by: Cedric Le Goater Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mnt_namespace.h | 2 +- include/linux/nsproxy.h | 2 +- include/linux/pid_namespace.h | 2 +- include/linux/utsname.h | 3 ++- 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 1fa4d9813b31..8eed44f8ca73 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -14,7 +14,7 @@ struct mnt_namespace { int event; }; -extern struct mnt_namespace *copy_mnt_ns(int, struct mnt_namespace *, +extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct fs_struct *); extern void __put_mnt_ns(struct mnt_namespace *ns); diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 6d179a397bfb..ce06188b7a56 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -32,7 +32,7 @@ struct nsproxy { }; extern struct nsproxy init_nsproxy; -int copy_namespaces(int flags, struct task_struct *tsk); +int copy_namespaces(unsigned long flags, struct task_struct *tsk); void get_task_namespaces(struct task_struct *tsk); void free_nsproxy(struct nsproxy *ns); int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 169c6c24209b..b9a17e08ff0f 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -29,7 +29,7 @@ static inline void get_pid_ns(struct pid_namespace *ns) kref_get(&ns->kref); } -extern struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *ns); +extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); extern void free_pid_ns(struct kref *kref); static inline void put_pid_ns(struct pid_namespace *ns) diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 51ad167611e4..923db99175f2 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -48,7 +48,8 @@ static inline void get_uts_ns(struct uts_namespace *ns) kref_get(&ns->kref); } -extern struct uts_namespace *copy_utsname(int flags, struct uts_namespace *ns); +extern struct uts_namespace *copy_utsname(unsigned long flags, + struct uts_namespace *ns); extern void free_uts_ns(struct kref *kref); static inline void put_uts_ns(struct uts_namespace *ns) -- cgit v1.2.3 From 132e4b0a049c39337c535501561b8301c7f2b202 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Sun, 15 Jul 2007 23:41:19 -0700 Subject: cdrom: replace hard-coded constants by kernel.h macro. Signed-off-by: Robert P. J. Day Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cdrom.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index f50f04bdbc16..2b641b176e7f 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -414,8 +414,8 @@ struct cdrom_generic_command #define CDO_CHECK_TYPE 0x10 /* check type on open for data */ /* Special codes used when specifying changer slots. */ -#define CDSL_NONE ((int) (~0U>>1)-1) -#define CDSL_CURRENT ((int) (~0U>>1)) +#define CDSL_NONE (INT_MAX-1) +#define CDSL_CURRENT INT_MAX /* For partition based multisession access. IDE can handle 64 partitions * per drive - SCSI CD-ROM's use minors to differentiate between the -- cgit v1.2.3 From 1d9d02feeee89e9132034d504c9a45eeaf618a3d Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Sun, 15 Jul 2007 23:41:32 -0700 Subject: move seccomp from /proc to a prctl This reduces the memory footprint and it enforces that only the current task can enable seccomp on itself (this is a requirement for a strightforward [modulo preempt ;) ] TIF_NOTSC implementation). Signed-off-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prctl.h | 4 ++++ include/linux/seccomp.h | 15 +++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 52a9be41250d..e2eff9079fe9 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -59,4 +59,8 @@ # define PR_ENDIAN_LITTLE 1 /* True little endian mode */ # define PR_ENDIAN_PPC_LITTLE 2 /* "PowerPC" pseudo little endian */ +/* Get/set process seccomp mode */ +#define PR_GET_SECCOMP 21 +#define PR_SET_SECCOMP 22 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 3e8b1cf54303..d708974dbfe3 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -4,8 +4,6 @@ #ifdef CONFIG_SECCOMP -#define NR_SECCOMP_MODES 1 - #include #include @@ -23,6 +21,9 @@ static inline int has_secure_computing(struct thread_info *ti) return unlikely(test_ti_thread_flag(ti, TIF_SECCOMP)); } +extern long prctl_get_seccomp(void); +extern long prctl_set_seccomp(unsigned long); + #else /* CONFIG_SECCOMP */ typedef struct { } seccomp_t; @@ -34,6 +35,16 @@ static inline int has_secure_computing(struct thread_info *ti) return 0; } +static inline long prctl_get_seccomp(void) +{ + return -EINVAL; +} + +static inline long prctl_set_seccomp(unsigned long arg2) +{ + return -EINVAL; +} + #endif /* CONFIG_SECCOMP */ #endif /* _LINUX_SECCOMP_H */ -- cgit v1.2.3 From cf99abace7e07dd8491e7093a9a9ef11d48838ed Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Sun, 15 Jul 2007 23:41:33 -0700 Subject: make seccomp zerocost in schedule This follows a suggestion from Chuck Ebbert on how to make seccomp absolutely zerocost in schedule too. The only remaining footprint of seccomp is in terms of the bzImage size that becomes a few bytes (perhaps even a few kbytes) larger, measure it if you care in the embedded. Signed-off-by: Andrea Arcangeli Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/seccomp.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index d708974dbfe3..262a8dccfa81 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -16,11 +16,6 @@ static inline void secure_computing(int this_syscall) __secure_computing(this_syscall); } -static inline int has_secure_computing(struct thread_info *ti) -{ - return unlikely(test_ti_thread_flag(ti, TIF_SECCOMP)); -} - extern long prctl_get_seccomp(void); extern long prctl_set_seccomp(unsigned long); @@ -29,11 +24,6 @@ extern long prctl_set_seccomp(unsigned long); typedef struct { } seccomp_t; #define secure_computing(x) do { } while (0) -/* static inline to preserve typechecking */ -static inline int has_secure_computing(struct thread_info *ti) -{ - return 0; -} static inline long prctl_get_seccomp(void) { -- cgit v1.2.3 From cc2ea416b2aa04d0c34ff2281a23dae5b76b7b3b Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 15 Jul 2007 23:41:38 -0700 Subject: uninline check_signature() This is a rather bizarre thing to have inlined in io.h. Stick it in lib/ instead. While we're there, despaghetti it a bit, and fix its off-by-one behaviour when passed a zero length. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/io.h | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io.h b/include/linux/io.h index 8423dd376514..e3b2dda6c8eb 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -63,32 +63,7 @@ void __iomem * devm_ioremap(struct device *dev, unsigned long offset, void __iomem * devm_ioremap_nocache(struct device *dev, unsigned long offset, unsigned long size); void devm_iounmap(struct device *dev, void __iomem *addr); - -/** - * check_signature - find BIOS signatures - * @io_addr: mmio address to check - * @signature: signature block - * @length: length of signature - * - * Perform a signature comparison with the mmio address io_addr. This - * address should have been obtained by ioremap. - * Returns 1 on a match. - */ - -static inline int check_signature(const volatile void __iomem *io_addr, - const unsigned char *signature, int length) -{ - int retval = 0; - do { - if (readb(io_addr) != *signature) - goto out; - io_addr++; - signature++; - length--; - } while (length); - retval = 1; -out: - return retval; -} +int check_signature(const volatile void __iomem *io_addr, + const unsigned char *signature, int length); #endif /* _LINUX_IO_H */ -- cgit v1.2.3 From 608e2619682e951f525b08e7a48669a3c0263b41 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 15 Jul 2007 23:41:39 -0700 Subject: generic bug: use show_regs() instead of dump_stack() The current generic bug implementation has a call to dump_stack() in case a WARN_ON(whatever) gets hit. Since report_bug(), which calls dump_stack(), gets called from an exception handler we can do better: just pass the pt_regs structure to report_bug() and pass it to show_regs() in case of a warning. This will give more debug informations like register contents, etc... In addition this avoids some pointless lines that dump_stack() emits, since it includes a stack backtrace of the exception handler which is of no interest in case of a warning. E.g. on s390 the following lines are currently always present in a stack backtrace if dump_stack() gets called from report_bug(): [<000000000001517a>] show_trace+0x92/0xe8) [<0000000000015270>] show_stack+0xa0/0xd0 [<00000000000152ce>] dump_stack+0x2e/0x3c [<0000000000195450>] report_bug+0x98/0xf8 [<0000000000016cc8>] illegal_op+0x1fc/0x21c [<00000000000227d6>] sysc_return+0x0/0x10 Acked-by: Jeremy Fitzhardinge Acked-by: Haavard Skinnemoen Cc: Andi Kleen Cc: Kyle McMartin Cc: Paul Mackerras Cc: Paul Mundt Cc: Martin Schwidefsky Signed-off-by: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index 42aa0a54b6f4..54398d2c6d8d 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -10,6 +10,8 @@ enum bug_trap_type { BUG_TRAP_TYPE_BUG = 2, }; +struct pt_regs; + #ifdef CONFIG_GENERIC_BUG #include @@ -20,7 +22,7 @@ static inline int is_warning_bug(const struct bug_entry *bug) const struct bug_entry *find_bug(unsigned long bugaddr); -enum bug_trap_type report_bug(unsigned long bug_addr); +enum bug_trap_type report_bug(unsigned long bug_addr, struct pt_regs *regs); int module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, struct module *); @@ -31,7 +33,8 @@ int is_valid_bugaddr(unsigned long addr); #else /* !CONFIG_GENERIC_BUG */ -static inline enum bug_trap_type report_bug(unsigned long bug_addr) +static inline enum bug_trap_type report_bug(unsigned long bug_addr, + struct pt_regs *regs) { return BUG_TRAP_TYPE_BUG; } -- cgit v1.2.3 From d52988023a37720e9e4aeb66362be67fa21d8836 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sun, 15 Jul 2007 23:41:42 -0700 Subject: Remove the last few UMSDOS leftovers The UMSDOS filesystem was removed back in 2.6.11, but some tiny bits stuck around. This patch removes the few remaining leftovers. The only things left behind after this are the entries in the CREDITS file and the ioctl number in Documentation/ioctl-number.txt as documentation. This third (hopefully final) version of the patch doesn't edit the arch/um/config.release file, since Jeff Dike pointed out to me that it should die completely, and asked me to remove it from my patch as he'll send in a seperate patch removing the file completely. Signed-off-by: Jesper Juhl Acked-by: H. Peter Anvin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ncp_fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h index 83e39eb054d3..88766e43e121 100644 --- a/include/linux/ncp_fs.h +++ b/include/linux/ncp_fs.h @@ -148,8 +148,6 @@ struct ncp_nls_ioctl #include #include -/* undef because public define in umsdos_fs.h (ncp_fs.h isn't public) */ -#undef PRINTK /* define because it is easy to change PRINTK to {*}PRINTK */ #define PRINTK(format, args...) printk(KERN_DEBUG format , ## args) -- cgit v1.2.3 From f5a421a4509a7e2dff11da0f01b0548f4f84d503 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 15 Jul 2007 23:41:44 -0700 Subject: rename cancel_rearming_delayed_work() to cancel_delayed_work_sync() Imho, the current naming of cancel_xxx workqueue functions is very confusing. cancel_delayed_work() cancel_rearming_delayed_work() cancel_rearming_delayed_workqueue() // obsolete cancel_work_sync() This looks as if the first 2 functions differ in "type" of their argument which is not true any longer, nowadays the difference is the behaviour. The semantics of cancel_rearming_delayed_work(dwork) was changed significantly, it doesn't require that dwork rearms itself, and cancels dwork synchronously. Rename it to cancel_delayed_work_sync(). This matches cancel_delayed_work() and cancel_work_sync(). Re-create cancel_rearming_delayed_work() as a simple inline obsolete wrapper, like cancel_rearming_delayed_workqueue(). Signed-off-by: Oleg Nesterov Acked-by: Jarek Poplawski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/workqueue.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index ce0719a2cfeb..5c89ac6e7f55 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -166,14 +166,21 @@ static inline int cancel_delayed_work(struct delayed_work *work) return ret; } -extern void cancel_rearming_delayed_work(struct delayed_work *work); +extern void cancel_delayed_work_sync(struct delayed_work *work); -/* Obsolete. use cancel_rearming_delayed_work() */ +/* Obsolete. use cancel_delayed_work_sync() */ static inline void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq, struct delayed_work *work) { - cancel_rearming_delayed_work(work); + cancel_delayed_work_sync(work); +} + +/* Obsolete. use cancel_delayed_work_sync() */ +static inline +void cancel_rearming_delayed_work(struct delayed_work *work) +{ + cancel_delayed_work_sync(work); } #endif -- cgit v1.2.3 From 1f1f642e2f092e37eb9038060eb0100c44f55a11 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 15 Jul 2007 23:41:44 -0700 Subject: make cancel_xxx_work_sync() return a boolean Change cancel_work_sync() and cancel_delayed_work_sync() to return a boolean indicating whether the work was actually cancelled. A zero return value means that the work was not pending/queued. Without that kind of change it is not possible to avoid flush_workqueue() sometimes, see the next patch as an example. Also, this patch unifies both functions and kills the (unlikely) busy-wait loop. Signed-off-by: Oleg Nesterov Acked-by: Jarek Poplawski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/workqueue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 5c89ac6e7f55..ce6badc98f6d 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -148,7 +148,7 @@ extern int keventd_up(void); extern void init_workqueues(void); int execute_in_process_context(work_func_t fn, struct execute_work *); -extern void cancel_work_sync(struct work_struct *work); +extern int cancel_work_sync(struct work_struct *work); /* * Kill off a pending schedule_delayed_work(). Note that the work callback @@ -166,7 +166,7 @@ static inline int cancel_delayed_work(struct delayed_work *work) return ret; } -extern void cancel_delayed_work_sync(struct delayed_work *work); +extern int cancel_delayed_work_sync(struct delayed_work *work); /* Obsolete. use cancel_delayed_work_sync() */ static inline -- cgit v1.2.3 From 8f8a68ee486e1c81eaead3c521822bf86142d380 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 15 Jul 2007 23:41:52 -0700 Subject: remove mm/backing-dev.c:congestion_wait_interruptible() congestion_wait_interruptible() is no longer used. Signed-off-by: Adrian Bunk Acked-by: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f2542c24b328..7011d6255593 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -93,7 +93,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) void clear_bdi_congested(struct backing_dev_info *bdi, int rw); void set_bdi_congested(struct backing_dev_info *bdi, int rw); long congestion_wait(int rw, long timeout); -long congestion_wait_interruptible(int rw, long timeout); void congestion_end(int rw); #define bdi_cap_writeback_dirty(bdi) \ -- cgit v1.2.3 From 2235219b7721b8e74de6841e79240936561a2b63 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sun, 15 Jul 2007 23:41:58 -0700 Subject: ext2: statfs speed up This is a patch that speeds up statfs. It is very simple - the "overhead" calculation, which takes a huge amount of time for large filesystems, never changes unless the size of the filesystem itself changes. That means we can store it in memory and only recalculate if the filesystem has been resized (almost never). It also fixes a minor problem that we never update the on-disk superblock free blocks/inodes counts until the filesystem is unmounted. While not fatal, we may as well update that on disk when we have the information, and it makes things like debugfs and dumpe2fs report a bit more accurate info. Signed-off-by: Badari Pulavarty Signed-off-by: Andreas Dilger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext2_fs_sb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext2_fs_sb.h b/include/linux/ext2_fs_sb.h index 4eda0ed76a48..d149f2959e67 100644 --- a/include/linux/ext2_fs_sb.h +++ b/include/linux/ext2_fs_sb.h @@ -33,6 +33,8 @@ struct ext2_sb_info { unsigned long s_gdb_count; /* Number of group descriptor blocks */ unsigned long s_desc_per_block; /* Number of group descriptors per block */ unsigned long s_groups_count; /* Number of groups in the fs */ + unsigned long s_overhead_last; /* Last calculated overhead */ + unsigned long s_blocks_last; /* Last seen block count */ struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext2_super_block * s_es; /* Pointer to the super block in the buffer */ struct buffer_head ** s_group_desc; -- cgit v1.2.3 From a71ce8c6c9bf269b192f352ea555217815cf027e Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sun, 15 Jul 2007 23:41:59 -0700 Subject: ext3: statfs speed up This is a patch that speeds up statfs. It is very simple - the "overhead" calculation, which takes a huge amount of time for large filesystems, never changes unless the size of the filesystem itself changes. That means we can store it in memory and only recalculate if the filesystem has been resized (almost never). It also fixes a minor problem that we never update the on-disk superblock free blocks/inodes counts until the filesystem is unmounted. While not fatal, we may as well update that on disk when we have the information, and it makes things like debugfs and dumpe2fs report a bit more accurate info. Signed-off-by: Badari Pulavarty Signed-off-by: Andreas Dilger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext3_fs_sb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h index f61309c81cc4..d3c08353edf6 100644 --- a/include/linux/ext3_fs_sb.h +++ b/include/linux/ext3_fs_sb.h @@ -38,6 +38,8 @@ struct ext3_sb_info { unsigned long s_gdb_count; /* Number of group descriptor blocks */ unsigned long s_desc_per_block; /* Number of group descriptors per block */ unsigned long s_groups_count; /* Number of groups in the fs */ + unsigned long s_overhead_last; /* Last calculated overhead */ + unsigned long s_blocks_last; /* Last seen block count */ struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext3_super_block * s_es; /* Pointer to the super block in the buffer */ struct buffer_head ** s_group_desc; -- cgit v1.2.3 From 5e70030d4cf91613530a23b40ad9919bb9ee114f Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sun, 15 Jul 2007 23:42:00 -0700 Subject: ext4: statfs speed up This is a patch that speeds up statfs. It is very simple - the "overhead" calculation, which takes a huge amount of time for large filesystems, never changes unless the size of the filesystem itself changes. That means we can store it in memory and only recalculate if the filesystem has been resized (almost never). It also fixes a minor problem that we never update the on-disk superblock free blocks/inodes counts until the filesystem is unmounted. While not fatal, we may as well update that on disk when we have the information, and it makes things like debugfs and dumpe2fs report a bit more accurate info. Signed-off-by: Badari Pulavarty Signed-off-by: Andreas Dilger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ext4_fs_sb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h index 691a713139ce..2347557a327a 100644 --- a/include/linux/ext4_fs_sb.h +++ b/include/linux/ext4_fs_sb.h @@ -39,6 +39,8 @@ struct ext4_sb_info { unsigned long s_gdb_count; /* Number of group descriptor blocks */ unsigned long s_desc_per_block; /* Number of group descriptors per block */ unsigned long s_groups_count; /* Number of groups in the fs */ + unsigned long s_overhead_last; /* Last calculated overhead */ + unsigned long s_blocks_last; /* Last seen block count */ struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ struct buffer_head ** s_group_desc; -- cgit v1.2.3 From 2e27afb300b56d83bb03fbfa68852b9c1e2920c6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 16 Jul 2007 14:31:08 -0700 Subject: Revert "[NET]: Fix races in net_rx_action vs netpoll." This reverts commit 29578624e354f56143d92510fff33a8b2aaa2c03. Ingo Molnar reports complete breakage with his e1000 card (no networking, card reports transmit timeouts), and bisected it down to this commit. Let's figure out what went wrong, but not keep breaking machines until we do. Cc: Ingo Molnar Cc: Olaf Kirch Cc: David Miller Signed-off-by: Linus Torvalds --- include/linux/netdevice.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 322b5eae57dd..da7a13c97eb8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -262,8 +262,6 @@ enum netdev_state_t __LINK_STATE_LINKWATCH_PENDING, __LINK_STATE_DORMANT, __LINK_STATE_QDISC_RUNNING, - /* Set by the netpoll NAPI code */ - __LINK_STATE_POLL_LIST_FROZEN, }; @@ -1022,14 +1020,6 @@ static inline void netif_rx_complete(struct net_device *dev) { unsigned long flags; -#ifdef CONFIG_NETPOLL - /* Prevent race with netpoll - yes, this is a kludge. - * But at least it doesn't penalize the non-netpoll - * code path. */ - if (test_bit(__LINK_STATE_POLL_LIST_FROZEN, &dev->state)) - return; -#endif - local_irq_save(flags); __netif_rx_complete(dev); local_irq_restore(flags); -- cgit v1.2.3 From 13bd59a111760bb7cba8dcf17b6b55a0d99d3592 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Jul 2007 14:18:47 +0200 Subject: Don't define empty struct bsg_class_device if !CONFIG_BLK_DEV_BSG Don't define an empty struct bsg_class_device if !CONFIG_BLK_DEV_BSG. It's embedded in struct request_queue, but there we have #if defined(CONFIG_BLK_DEV_BSG) struct bsg_class_device bsg_dev; #endif anyway. Signed-off-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- include/linux/bsg.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bsg.h b/include/linux/bsg.h index bd998ca6cb2e..8547b10c388b 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -60,7 +60,6 @@ struct bsg_class_device { extern int bsg_register_queue(struct request_queue *, const char *); extern void bsg_unregister_queue(struct request_queue *); #else -struct bsg_class_device { }; #define bsg_register_queue(disk, name) (0) #define bsg_unregister_queue(disk) do { } while (0) #endif -- cgit v1.2.3 From 769848c03895b63e5662eb7e4ec8c4866f7d0183 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 17 Jul 2007 04:03:05 -0700 Subject: Add __GFP_MOVABLE for callers to flag allocations from high memory that may be migrated It is often known at allocation time whether a page may be migrated or not. This patch adds a flag called __GFP_MOVABLE and a new mask called GFP_HIGH_MOVABLE. Allocations using the __GFP_MOVABLE can be either migrated using the page migration mechanism or reclaimed by syncing with backing storage and discarding. An API function very similar to alloc_zeroed_user_highpage() is added for __GFP_MOVABLE allocations called alloc_zeroed_user_highpage_movable(). The flags used by alloc_zeroed_user_highpage() are not changed because it would change the semantics of an existing API. After this patch is applied there are no in-kernel users of alloc_zeroed_user_highpage() so it probably should be marked deprecated if this patch is merged. Note that this patch includes a minor cleanup to the use of __GFP_ZERO in shmem.c to keep all flag modifications to inode->mapping in the shmem_dir_alloc() helper function. This clean-up suggestion is courtesy of Hugh Dickens. Additional credit goes to Christoph Lameter and Linus Torvalds for shaping the concept. Credit to Hugh Dickens for catching issues with shmem swap vector and ramfs allocations. [akpm@linux-foundation.org: build fix] [hugh@veritas.com: __GFP_ZERO cleanup] Signed-off-by: Mel Gorman Cc: Andy Whitcroft Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 16 +++++++++++++++- include/linux/highmem.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0d2ef0b082a6..e5882fe49f83 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -30,6 +30,9 @@ struct vm_area_struct; * cannot handle allocation failures. * * __GFP_NORETRY: The VM implementation must not retry indefinitely. + * + * __GFP_MOVABLE: Flag that this page will be movable by the page migration + * mechanism or reclaimed */ #define __GFP_WAIT ((__force gfp_t)0x10u) /* Can wait and reschedule? */ #define __GFP_HIGH ((__force gfp_t)0x20u) /* Should access emergency pools? */ @@ -45,6 +48,7 @@ struct vm_area_struct; #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */ #define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ +#define __GFP_MOVABLE ((__force gfp_t)0x80000u) /* Page is movable */ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) @@ -53,7 +57,8 @@ struct vm_area_struct; #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ __GFP_NOFAIL|__GFP_NORETRY|__GFP_COMP| \ - __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE) + __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE| \ + __GFP_MOVABLE) /* This equals 0, but use constants in case they ever change */ #define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH) @@ -65,6 +70,15 @@ struct vm_area_struct; #define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) #define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \ __GFP_HIGHMEM) +#define GFP_HIGHUSER_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ + __GFP_HARDWALL | __GFP_HIGHMEM | \ + __GFP_MOVABLE) +#define GFP_NOFS_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_MOVABLE) +#define GFP_USER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ + __GFP_HARDWALL | __GFP_MOVABLE) +#define GFP_HIGHUSER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ + __GFP_HARDWALL | __GFP_HIGHMEM | \ + __GFP_MOVABLE) #ifdef CONFIG_NUMA #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 98e2cce996a4..12c5e4e3135a 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -73,10 +73,27 @@ static inline void clear_user_highpage(struct page *page, unsigned long vaddr) } #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE +/** + * __alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA with caller-specified movable GFP flags + * @movableflags: The GFP flags related to the pages future ability to move like __GFP_MOVABLE + * @vma: The VMA the page is to be allocated for + * @vaddr: The virtual address the page will be inserted into + * + * This function will allocate a page for a VMA but the caller is expected + * to specify via movableflags whether the page will be movable in the + * future or not + * + * An architecture may override this function by defining + * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE and providing their own + * implementation. + */ static inline struct page * -alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr) +__alloc_zeroed_user_highpage(gfp_t movableflags, + struct vm_area_struct *vma, + unsigned long vaddr) { - struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, vaddr); + struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags, + vma, vaddr); if (page) clear_user_highpage(page, vaddr); @@ -85,6 +102,36 @@ alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr) } #endif +/** + * alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA + * @vma: The VMA the page is to be allocated for + * @vaddr: The virtual address the page will be inserted into + * + * This function will allocate a page for a VMA that the caller knows will + * not be able to move in the future using move_pages() or reclaim. If it + * is known that the page can move, use alloc_zeroed_user_highpage_movable + */ +static inline struct page * +alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr) +{ + return __alloc_zeroed_user_highpage(0, vma, vaddr); +} + +/** + * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move + * @vma: The VMA the page is to be allocated for + * @vaddr: The virtual address the page will be inserted into + * + * This function will allocate a page for a VMA that the caller knows will + * be able to migrate in the future using move_pages() or reclaimed + */ +static inline struct page * +alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, + unsigned long vaddr) +{ + return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr); +} + static inline void clear_highpage(struct page *page) { void *kaddr = kmap_atomic(page, KM_USER0); -- cgit v1.2.3 From 2a1e274acf0b1c192face19a4be7c12d4503eaaf Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 17 Jul 2007 04:03:12 -0700 Subject: Create the ZONE_MOVABLE zone The following 8 patches against 2.6.20-mm2 create a zone called ZONE_MOVABLE that is only usable by allocations that specify both __GFP_HIGHMEM and __GFP_MOVABLE. This has the effect of keeping all non-movable pages within a single memory partition while allowing movable allocations to be satisfied from either partition. The patches may be applied with the list-based anti-fragmentation patches that groups pages together based on mobility. The size of the zone is determined by a kernelcore= parameter specified at boot-time. This specifies how much memory is usable by non-movable allocations and the remainder is used for ZONE_MOVABLE. Any range of pages within ZONE_MOVABLE can be released by migrating the pages or by reclaiming. When selecting a zone to take pages from for ZONE_MOVABLE, there are two things to consider. First, only memory from the highest populated zone is used for ZONE_MOVABLE. On the x86, this is probably going to be ZONE_HIGHMEM but it would be ZONE_DMA on ppc64 or possibly ZONE_DMA32 on x86_64. Second, the amount of memory usable by the kernel will be spread evenly throughout NUMA nodes where possible. If the nodes are not of equal size, the amount of memory usable by the kernel on some nodes may be greater than others. By default, the zone is not as useful for hugetlb allocations because they are pinned and non-migratable (currently at least). A sysctl is provided that allows huge pages to be allocated from that zone. This means that the huge page pool can be resized to the size of ZONE_MOVABLE during the lifetime of the system assuming that pages are not mlocked. Despite huge pages being non-movable, we do not introduce additional external fragmentation of note as huge pages are always the largest contiguous block we care about. Credit goes to Andy Whitcroft for catching a large variety of problems during review of the patches. This patch creates an additional zone, ZONE_MOVABLE. This zone is only usable by allocations which specify both __GFP_HIGHMEM and __GFP_MOVABLE. Hot-added memory continues to be placed in their existing destination as there is no mechanism to redirect them to a specific zone. [y-goto@jp.fujitsu.com: Fix section mismatch of memory hotplug related code] [akpm@linux-foundation.org: various fixes] Signed-off-by: Mel Gorman Cc: Andy Whitcroft Signed-off-by: Yasunori Goto Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 3 +++ include/linux/mm.h | 1 + include/linux/mmzone.h | 20 ++++++++++++++++++-- include/linux/vmstat.h | 5 +++-- 4 files changed, 25 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e5882fe49f83..bc68dd9a6d41 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -106,6 +106,9 @@ static inline enum zone_type gfp_zone(gfp_t flags) if (flags & __GFP_DMA32) return ZONE_DMA32; #endif + if ((flags & (__GFP_HIGHMEM | __GFP_MOVABLE)) == + (__GFP_HIGHMEM | __GFP_MOVABLE)) + return ZONE_MOVABLE; #ifdef CONFIG_HIGHMEM if (flags & __GFP_HIGHMEM) return ZONE_HIGHMEM; diff --git a/include/linux/mm.h b/include/linux/mm.h index 97d0cddfd223..857e44817178 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1005,6 +1005,7 @@ extern unsigned long find_max_pfn_with_active_regions(void); extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); extern void sparse_memory_present_with_active_regions(int nid); +extern int cmdline_parse_kernelcore(char *p); #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID extern int early_pfn_to_nid(unsigned long pfn); #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 04b1636a970b..d71ff763c9df 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -146,6 +146,7 @@ enum zone_type { */ ZONE_HIGHMEM, #endif + ZONE_MOVABLE, MAX_NR_ZONES }; @@ -167,6 +168,7 @@ enum zone_type { + defined(CONFIG_ZONE_DMA32) \ + 1 \ + defined(CONFIG_HIGHMEM) \ + + 1 \ ) #if __ZONE_COUNT < 2 #define ZONES_SHIFT 0 @@ -499,10 +501,22 @@ static inline int populated_zone(struct zone *zone) return (!!zone->present_pages); } +extern int movable_zone; + +static inline int zone_movable_is_highmem(void) +{ +#if defined(CONFIG_HIGHMEM) && defined(CONFIG_ARCH_POPULATES_NODE_MAP) + return movable_zone == ZONE_HIGHMEM; +#else + return 0; +#endif +} + static inline int is_highmem_idx(enum zone_type idx) { #ifdef CONFIG_HIGHMEM - return (idx == ZONE_HIGHMEM); + return (idx == ZONE_HIGHMEM || + (idx == ZONE_MOVABLE && zone_movable_is_highmem())); #else return 0; #endif @@ -522,7 +536,9 @@ static inline int is_normal_idx(enum zone_type idx) static inline int is_highmem(struct zone *zone) { #ifdef CONFIG_HIGHMEM - return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM; + int zone_idx = zone - zone->zone_pgdat->node_zones; + return zone_idx == ZONE_HIGHMEM || + (zone_idx == ZONE_MOVABLE && zone_movable_is_highmem()); #else return 0; #endif diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index d9325cf8a134..75370ec0923e 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -25,7 +25,7 @@ #define HIGHMEM_ZONE(xx) #endif -#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx) +#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx) , xx##_MOVABLE enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, FOR_ALL_ZONES(PGALLOC), @@ -170,7 +170,8 @@ static inline unsigned long node_page_state(int node, #ifdef CONFIG_HIGHMEM zone_page_state(&zones[ZONE_HIGHMEM], item) + #endif - zone_page_state(&zones[ZONE_NORMAL], item); + zone_page_state(&zones[ZONE_NORMAL], item) + + zone_page_state(&zones[ZONE_MOVABLE], item); } extern void zone_statistics(struct zonelist *, struct zone *); -- cgit v1.2.3 From 396faf0303d273219db5d7eb4a2879ad977ed185 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 17 Jul 2007 04:03:13 -0700 Subject: Allow huge page allocations to use GFP_HIGH_MOVABLE Huge pages are not movable so are not allocated from ZONE_MOVABLE. However, as ZONE_MOVABLE will always have pages that can be migrated or reclaimed, it can be used to satisfy hugepage allocations even when the system has been running a long time. This allows an administrator to resize the hugepage pool at runtime depending on the size of ZONE_MOVABLE. This patch adds a new sysctl called hugepages_treat_as_movable. When a non-zero value is written to it, future allocations for the huge page pool will use ZONE_MOVABLE. Despite huge pages being non-movable, we do not introduce additional external fragmentation of note as huge pages are always the largest contiguous block we care about. [akpm@linux-foundation.org: various fixes] Signed-off-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 ++ include/linux/mempolicy.h | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 2c13715e9dde..49b7053043ad 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -15,6 +15,7 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) } int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); @@ -29,6 +30,7 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to); void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); extern unsigned long max_huge_pages; +extern unsigned long hugepages_treat_as_movable; extern const unsigned long hugetlb_zero, hugetlb_infinity; extern int sysctl_hugetlb_shm_group; diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index daabb3aa1ec6..e147cf50529f 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -159,7 +159,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p); extern struct mempolicy default_policy; extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, - unsigned long addr); + unsigned long addr, gfp_t gfp_flags); extern unsigned slab_node(struct mempolicy *policy); extern enum zone_type policy_zone; @@ -256,9 +256,9 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p) #define set_cpuset_being_rebound(x) do {} while (0) static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, - unsigned long addr) + unsigned long addr, gfp_t gfp_flags) { - return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER); + return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags); } static inline int do_migrate_pages(struct mm_struct *mm, -- cgit v1.2.3 From ed7ed365172e27b0efe9d43cc962723c7193e34e Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 17 Jul 2007 04:03:14 -0700 Subject: handle kernelcore=: generic This patch adds the kernelcore= parameter for x86. Once all patches are applied, a new command-line parameter exist and a new sysctl. This patch adds the necessary documentation. From: Yasunori Goto When "kernelcore" boot option is specified, kernel can't boot up on ia64 because of an infinite loop. In addition, the parsing code can be handled in an architecture-independent manner. This patch uses common code to handle the kernelcore= parameter. It is only available to architectures that support arch-independent zone-sizing (i.e. define CONFIG_ARCH_POPULATES_NODE_MAP). Other architectures will ignore the boot parameter. [bunk@stusta.de: make cmdline_parse_kernelcore() static] Signed-off-by: Mel Gorman Signed-off-by: Yasunori Goto Acked-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 857e44817178..97d0cddfd223 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1005,7 +1005,6 @@ extern unsigned long find_max_pfn_with_active_regions(void); extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); extern void sparse_memory_present_with_active_regions(int nid); -extern int cmdline_parse_kernelcore(char *p); #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID extern int early_pfn_to_nid(unsigned long pfn); #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ -- cgit v1.2.3 From 5ad333eb66ff1e52a87639822ae088577669dcf9 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Tue, 17 Jul 2007 04:03:16 -0700 Subject: Lumpy Reclaim V4 When we are out of memory of a suitable size we enter reclaim. The current reclaim algorithm targets pages in LRU order, which is great for fairness at order-0 but highly unsuitable if you desire pages at higher orders. To get pages of higher order we must shoot down a very high proportion of memory; >95% in a lot of cases. This patch set adds a lumpy reclaim algorithm to the allocator. It targets groups of pages at the specified order anchored at the end of the active and inactive lists. This encourages groups of pages at the requested orders to move from active to inactive, and active to free lists. This behaviour is only triggered out of direct reclaim when higher order pages have been requested. This patch set is particularly effective when utilised with an anti-fragmentation scheme which groups pages of similar reclaimability together. This patch set is based on Peter Zijlstra's lumpy reclaim V2 patch which forms the foundation. Credit to Mel Gorman for sanitity checking. Mel said: The patches have an application with hugepage pool resizing. When lumpy-reclaim is used used with ZONE_MOVABLE, the hugepages pool can be resized with greater reliability. Testing on a desktop machine with 2GB of RAM showed that growing the hugepage pool with ZONE_MOVABLE on it's own was very slow as the success rate was quite low. Without lumpy-reclaim, each attempt to grow the pool by 100 pages would yield 1 or 2 hugepages. With lumpy-reclaim, getting 40 to 70 hugepages on each attempt was typical. [akpm@osdl.org: ia64 pfn_to_nid fixes and loop cleanup] [bunk@stusta.de: static declarations for internal functions] [a.p.zijlstra@chello.nl: initial lumpy V2 implementation] Signed-off-by: Andy Whitcroft Acked-by: Peter Zijlstra Acked-by: Mel Gorman Acked-by: Mel Gorman Cc: Bob Picco Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 8 ++++++++ include/linux/swap.h | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d71ff763c9df..da8eb8ad9e9b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -24,6 +24,14 @@ #endif #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) +/* + * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed + * costly to service. That is between allocation orders which should + * coelesce naturally under reasonable reclaim pressure and those which + * will not. + */ +#define PAGE_ALLOC_COSTLY_ORDER 3 + struct free_area { struct list_head free_list; unsigned long nr_free; diff --git a/include/linux/swap.h b/include/linux/swap.h index 006868881346..665f85f2a3af 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -188,7 +188,8 @@ extern int rotate_reclaimable_page(struct page *page); extern void swap_setup(void); /* linux/mm/vmscan.c */ -extern unsigned long try_to_free_pages(struct zone **, gfp_t); +extern unsigned long try_to_free_pages(struct zone **zones, int order, + gfp_t gfp_mask); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; extern int remove_mapping(struct address_space *mapping, struct page *page); -- cgit v1.2.3 From 8e1f936b73150f5095448a0fee6d4f30a1f9001d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Jul 2007 04:03:17 -0700 Subject: mm: clean up and kernelify shrinker registration I can never remember what the function to register to receive VM pressure is called. I have to trace down from __alloc_pages() to find it. It's called "set_shrinker()", and it needs Your Help. 1) Don't hide struct shrinker. It contains no magic. 2) Don't allocate "struct shrinker". It's not helpful. 3) Call them "register_shrinker" and "unregister_shrinker". 4) Call the function "shrink" not "shrinker". 5) Reduce the 17 lines of waffly comments to 13, but document it properly. Signed-off-by: Rusty Russell Cc: David Chinner Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 97d0cddfd223..4c482a3ee870 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -810,27 +810,31 @@ extern unsigned long do_mremap(unsigned long addr, unsigned long flags, unsigned long new_addr); /* - * Prototype to add a shrinker callback for ageable caches. - * - * These functions are passed a count `nr_to_scan' and a gfpmask. They should - * scan `nr_to_scan' objects, attempting to free them. + * A callback you can register to apply pressure to ageable caches. * - * The callback must return the number of objects which remain in the cache. + * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should + * look through the least-recently-used 'nr_to_scan' entries and + * attempt to free them up. It should return the number of objects + * which remain in the cache. If it returns -1, it means it cannot do + * any scanning at this time (eg. there is a risk of deadlock). * - * The callback will be passed nr_to_scan == 0 when the VM is querying the - * cache size, so a fastpath for that case is appropriate. - */ -typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask); - -/* - * Add an aging callback. The int is the number of 'seeks' it takes - * to recreate one of the objects that these functions age. + * The 'gfpmask' refers to the allocation we are currently trying to + * fulfil. + * + * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is + * querying the cache size, so a fastpath for that case is appropriate. */ +struct shrinker { + int (*shrink)(int nr_to_scan, gfp_t gfp_mask); + int seeks; /* seeks to recreate an obj */ -#define DEFAULT_SEEKS 2 -struct shrinker; -extern struct shrinker *set_shrinker(int, shrinker_t); -extern void remove_shrinker(struct shrinker *shrinker); + /* These are for internal use */ + struct list_head list; + long nr; /* objs pending delete */ +}; +#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ +extern void register_shrinker(struct shrinker *); +extern void unregister_shrinker(struct shrinker *); /* * Some shared mappigns will want the pages marked read-only -- cgit v1.2.3 From 6cb8f91320d3e720351c21741da795fed580b21b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 17 Jul 2007 04:03:22 -0700 Subject: Slab allocators: consistent ZERO_SIZE_PTR support and NULL result semantics Define ZERO_OR_NULL_PTR macro to be able to remove the checks from the allocators. Move ZERO_SIZE_PTR related stuff into slab.h. Make ZERO_SIZE_PTR work for all slab allocators and get rid of the WARN_ON_ONCE(size == 0) that is still remaining in SLAB. Make slub return NULL like the other allocators if a too large memory segment is requested via __kmalloc. Signed-off-by: Christoph Lameter Acked-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 13 +++++++++++++ include/linux/slab_def.h | 12 ++++++++++++ include/linux/slub_def.h | 12 ------------ 3 files changed, 25 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 27402fea9b79..0289ec89300a 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -30,6 +30,19 @@ #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ #define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */ +/* + * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. + * + * Dereferencing ZERO_SIZE_PTR will lead to a distinct access fault. + * + * ZERO_SIZE_PTR can be passed to kfree though in the same way that NULL can. + * Both make kfree a no-op. + */ +#define ZERO_SIZE_PTR ((void *)16) + +#define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) < \ + (unsigned long)ZERO_SIZE_PTR) + /* * struct kmem_cache related prototypes */ diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 365d036c454a..16e814ffab8d 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -32,6 +32,10 @@ static inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { int i = 0; + + if (!size) + return ZERO_SIZE_PTR; + #define CACHE(x) \ if (size <= x) \ goto found; \ @@ -58,6 +62,10 @@ static inline void *kzalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { int i = 0; + + if (!size) + return ZERO_SIZE_PTR; + #define CACHE(x) \ if (size <= x) \ goto found; \ @@ -88,6 +96,10 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size)) { int i = 0; + + if (!size) + return ZERO_SIZE_PTR; + #define CACHE(x) \ if (size <= x) \ goto found; \ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index a582f6771525..579b0a22858e 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -159,18 +159,6 @@ static inline struct kmem_cache *kmalloc_slab(size_t size) #define SLUB_DMA 0 #endif - -/* - * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. - * - * Dereferencing ZERO_SIZE_PTR will lead to a distinct access fault. - * - * ZERO_SIZE_PTR can be passed to kfree though in the same way that NULL can. - * Both make kfree a no-op. - */ -#define ZERO_SIZE_PTR ((void *)16) - - void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); -- cgit v1.2.3 From 0c710013200e72b5e0bc680ff4ec6bdac53c5ce8 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 17 Jul 2007 04:03:24 -0700 Subject: SLUB: add some more inlines and #ifdef CONFIG_SLUB_DEBUG Add #ifdefs around data structures only needed if debugging is compiled into SLUB. Add inlines to small functions to reduce code size. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 579b0a22858e..bae11111458f 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -16,7 +16,9 @@ struct kmem_cache_node { unsigned long nr_partial; atomic_long_t nr_slabs; struct list_head partial; +#ifdef CONFIG_SLUB_DEBUG struct list_head full; +#endif }; /* @@ -44,7 +46,9 @@ struct kmem_cache { int align; /* Alignment */ const char *name; /* Name (only for display!) */ struct list_head list; /* List of slab caches */ +#ifdef CONFIG_SLUB_DEBUG struct kobject kobj; /* For sysfs */ +#endif #ifdef CONFIG_NUMA int defrag_ratio; -- cgit v1.2.3 From 81cda6626178cd55297831296ba8ecedbfd8b52d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 17 Jul 2007 04:03:29 -0700 Subject: Slab allocators: Cleanup zeroing allocations It becomes now easy to support the zeroing allocs with generic inline functions in slab.h. Provide inline definitions to allow the continued use of kzalloc, kmem_cache_zalloc etc but remove other definitions of zeroing functions from the slab allocators and util.c. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 77 +++++++++++++++++++++++++++++------------------- include/linux/slab_def.h | 30 ------------------- include/linux/slub_def.h | 13 -------- 3 files changed, 46 insertions(+), 74 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 0289ec89300a..0e1d0daef6a2 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -55,7 +55,6 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, void (*)(void *, struct kmem_cache *, unsigned long)); void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); -void *kmem_cache_zalloc(struct kmem_cache *, gfp_t); void kmem_cache_free(struct kmem_cache *, void *); unsigned int kmem_cache_size(struct kmem_cache *); const char *kmem_cache_name(struct kmem_cache *); @@ -91,11 +90,37 @@ int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr); /* * Common kmalloc functions provided by all allocators */ -void *__kzalloc(size_t, gfp_t); void * __must_check krealloc(const void *, size_t, gfp_t); void kfree(const void *); size_t ksize(const void *); +/* + * Allocator specific definitions. These are mainly used to establish optimized + * ways to convert kmalloc() calls to kmem_cache_alloc() invocations by + * selecting the appropriate general cache at compile time. + * + * Allocators must define at least: + * + * kmem_cache_alloc() + * __kmalloc() + * kmalloc() + * + * Those wishing to support NUMA must also define: + * + * kmem_cache_alloc_node() + * kmalloc_node() + * + * See each allocator definition file for additional comments and + * implementation notes. + */ +#ifdef CONFIG_SLUB +#include +#elif defined(CONFIG_SLOB) +#include +#else +#include +#endif + /** * kcalloc - allocate memory for an array. The memory is set to zero. * @n: number of elements. @@ -151,37 +176,9 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) { if (n != 0 && size > ULONG_MAX / n) return NULL; - return __kzalloc(n * size, flags); + return __kmalloc(n * size, flags | __GFP_ZERO); } -/* - * Allocator specific definitions. These are mainly used to establish optimized - * ways to convert kmalloc() calls to kmem_cache_alloc() invocations by - * selecting the appropriate general cache at compile time. - * - * Allocators must define at least: - * - * kmem_cache_alloc() - * __kmalloc() - * kmalloc() - * kzalloc() - * - * Those wishing to support NUMA must also define: - * - * kmem_cache_alloc_node() - * kmalloc_node() - * - * See each allocator definition file for additional comments and - * implementation notes. - */ -#ifdef CONFIG_SLUB -#include -#elif defined(CONFIG_SLOB) -#include -#else -#include -#endif - #if !defined(CONFIG_NUMA) && !defined(CONFIG_SLOB) /** * kmalloc_node - allocate memory from a specific node @@ -255,5 +252,23 @@ extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *); #endif /* DEBUG_SLAB */ +/* + * Shortcuts + */ +static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags) +{ + return kmem_cache_alloc(k, flags | __GFP_ZERO); +} + +/** + * kzalloc - allocate memory. The memory is set to zero. + * @size: how many bytes of memory are required. + * @flags: the type of memory to allocate (see kmalloc). + */ +static inline void *kzalloc(size_t size, gfp_t flags) +{ + return kmalloc(size, flags | __GFP_ZERO); +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 16e814ffab8d..32bdc2ffd715 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -58,36 +58,6 @@ found: return __kmalloc(size, flags); } -static inline void *kzalloc(size_t size, gfp_t flags) -{ - if (__builtin_constant_p(size)) { - int i = 0; - - if (!size) - return ZERO_SIZE_PTR; - -#define CACHE(x) \ - if (size <= x) \ - goto found; \ - else \ - i++; -#include "kmalloc_sizes.h" -#undef CACHE - { - extern void __you_cannot_kzalloc_that_much(void); - __you_cannot_kzalloc_that_much(); - } -found: -#ifdef CONFIG_ZONE_DMA - if (flags & GFP_DMA) - return kmem_cache_zalloc(malloc_sizes[i].cs_dmacachep, - flags); -#endif - return kmem_cache_zalloc(malloc_sizes[i].cs_cachep, flags); - } - return __kzalloc(size, flags); -} - #ifdef CONFIG_NUMA extern void *__kmalloc_node(size_t size, gfp_t flags, int node); extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index bae11111458f..07f7e4cbcee3 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -179,19 +179,6 @@ static inline void *kmalloc(size_t size, gfp_t flags) return __kmalloc(size, flags); } -static inline void *kzalloc(size_t size, gfp_t flags) -{ - if (__builtin_constant_p(size) && !(flags & SLUB_DMA)) { - struct kmem_cache *s = kmalloc_slab(size); - - if (!s) - return ZERO_SIZE_PTR; - - return kmem_cache_zalloc(s, flags); - } else - return __kzalloc(size, flags); -} - #ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node); void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); -- cgit v1.2.3 From b5fab14e5d87df4d94161ae5f5e0c8625f9ffda2 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 17 Jul 2007 04:03:33 -0700 Subject: Add VM_BUG_ON in case someone uses page_mapping on a slab page Detect slab objects being passed to the page oriented functions of the VM. It is not sufficient to simply return NULL because the functions calling page_mapping may depend on other items of the page_struct also to be setup properly. Moreover slab object may not be properly aligned. The page oriented functions of the VM expect to operate on page aligned, page sized objects. Operations on object straddling page boundaries may only affect the objects partially which may lead to surprising results. It is better to detect eventually remaining uses and eliminate them. Signed-off-by: Christoph Lameter Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 4c482a3ee870..a5c451816fdc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -599,6 +599,7 @@ static inline struct address_space *page_mapping(struct page *page) { struct address_space *mapping = page->mapping; + VM_BUG_ON(PageSlab(page)); if (unlikely(PageSwapCache(page))) mapping = &swapper_space; #ifdef CONFIG_SLUB -- cgit v1.2.3 From 831441862956fffa17b9801db37e6ea1650b0f69 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 17 Jul 2007 04:03:35 -0700 Subject: Freezer: make kernel threads nonfreezable by default Currently, the freezer treats all tasks as freezable, except for the kernel threads that explicitly set the PF_NOFREEZE flag for themselves. This approach is problematic, since it requires every kernel thread to either set PF_NOFREEZE explicitly, or call try_to_freeze(), even if it doesn't care for the freezing of tasks at all. It seems better to only require the kernel threads that want to or need to be frozen to use some freezer-related code and to remove any freezer-related code from the other (nonfreezable) kernel threads, which is done in this patch. The patch causes all kernel threads to be nonfreezable by default (ie. to have PF_NOFREEZE set by default) and introduces the set_freezable() function that should be called by the freezable kernel threads in order to unset PF_NOFREEZE. It also makes all of the currently freezable kernel threads call set_freezable(), so it shouldn't cause any (intentional) change of behaviour to appear. Additionally, it updates documentation to describe the freezing of tasks more accurately. [akpm@linux-foundation.org: build fixes] Signed-off-by: Rafael J. Wysocki Acked-by: Nigel Cunningham Cc: Pavel Machek Cc: Oleg Nesterov Cc: Gautham R Shenoy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/freezer.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 4631086f5060..2d38b1a74662 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -1,5 +1,8 @@ /* Freezer declarations */ +#ifndef FREEZER_H_INCLUDED +#define FREEZER_H_INCLUDED + #include #ifdef CONFIG_PM @@ -115,6 +118,14 @@ static inline int freezer_should_skip(struct task_struct *p) return !!(p->flags & PF_FREEZER_SKIP); } +/* + * Tell the freezer that the current task should be frozen by it + */ +static inline void set_freezable(void) +{ + current->flags &= ~PF_NOFREEZE; +} + #else static inline int frozen(struct task_struct *p) { return 0; } static inline int freezing(struct task_struct *p) { return 0; } @@ -130,4 +141,7 @@ static inline int try_to_freeze(void) { return 0; } static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} static inline int freezer_should_skip(struct task_struct *p) { return 0; } +static inline void set_freezable(void) {} #endif + +#endif /* FREEZER_H_INCLUDED */ -- cgit v1.2.3 From 2a7326b5bbafac4c96bcdb944b2a773593030b96 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 17 Jul 2007 04:03:37 -0700 Subject: CONFIG_BOUNCE to avoid useless inclusion of bounce buffer logic The bounce buffer logic is included on systems that do not need it. If a system does not have zones like ZONE_DMA and ZONE_HIGHMEM that can lead to the use of bounce buffers then there is no need to reserve memory pools etc etc. This is true f.e. for SGI Altix. Also nicifies the Makefile and gets rid of the tricky "and" there. Signed-off-by: Christoph Lameter Acked-by: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b32564a1e105..f78965fc6426 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -624,7 +624,7 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn; */ #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) -#ifdef CONFIG_MMU +#ifdef CONFIG_BOUNCE extern int init_emergency_isa_pool(void); extern void blk_queue_bounce(request_queue_t *q, struct bio **bio); #else -- cgit v1.2.3 From bcdcd8e725b923ad7c0de809680d5d5658a7bf8c Mon Sep 17 00:00:00 2001 From: Pavel Emelianov Date: Tue, 17 Jul 2007 04:03:42 -0700 Subject: Report that kernel is tainted if there was an OOPS If the kernel OOPSed or BUGed then it probably should be considered as tainted. Thus, all subsequent OOPSes and SysRq dumps will report the tainted kernel. This saves a lot of time explaining oddities in the calltraces. Signed-off-by: Pavel Emelianov Acked-by: Randy Dunlap Cc: Signed-off-by: Andrew Morton [ Added parisc patch from Matthew Wilson -Linus ] Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 7a4852505914..1eb9cde550c4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -210,6 +210,7 @@ extern enum system_states { #define TAINT_MACHINE_CHECK (1<<4) #define TAINT_BAD_PAGE (1<<5) #define TAINT_USER (1<<6) +#define TAINT_DIE (1<<7) extern void dump_stack(void); -- cgit v1.2.3 From 7664732315c97f48dba9d1e7339ad16fc5a320ac Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 17 Jul 2007 04:03:43 -0700 Subject: PTRACE_PEEKDATA consolidation Identical implementations of PTRACE_PEEKDATA go into generic_ptrace_peekdata() function. Signed-off-by: Alexey Dobriyan Cc: Christoph Hellwig Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index eeb1976ef7bf..477cc8ed6bcb 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -110,6 +110,7 @@ static inline void ptrace_unlink(struct task_struct *child) __ptrace_unlink(child); } +int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data); #ifndef force_successful_syscall_return /* -- cgit v1.2.3 From f284ce7269031947326bac6bb19a977705276222 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 17 Jul 2007 04:03:44 -0700 Subject: PTRACE_POKEDATA consolidation Identical implementations of PTRACE_POKEDATA go into generic_ptrace_pokedata() function. AFAICS, fix bug on xtensa where successful PTRACE_POKEDATA will nevertheless return EPERM. Signed-off-by: Alexey Dobriyan Cc: Christoph Hellwig Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 477cc8ed6bcb..ae8146abd746 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -111,6 +111,7 @@ static inline void ptrace_unlink(struct task_struct *child) } int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data); +int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data); #ifndef force_successful_syscall_return /* -- cgit v1.2.3 From 62239ac2b301abc397e70986649666cfb7835907 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 17 Jul 2007 04:03:45 -0700 Subject: proper prototype for proc_nr_files() Add a proper prototype for proc_nr_files() in include/linux/fs.h Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e68780810279..b3a9f0db9d80 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -283,6 +283,7 @@ extern int dir_notify_enable; #include #include #include +#include #include #include @@ -2050,5 +2051,9 @@ static inline void free_secdata(void *secdata) { } #endif /* CONFIG_SECURITY */ +int proc_nr_files(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); + + #endif /* __KERNEL__ */ #endif /* _LINUX_FS_H */ -- cgit v1.2.3 From f4480240f700587c15507b7815e75989b16825b2 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 17 Jul 2007 04:03:47 -0700 Subject: unregister_blkdev(): return void Put WARN_ON and fixed all callers of unregister_blkdev(). Now we can make unregister_blkdev return void. Cc: Jens Axboe Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b3a9f0db9d80..aa74f7de9dcd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1527,7 +1527,7 @@ extern void putname(const char *name); #ifdef CONFIG_BLOCK extern int register_blkdev(unsigned int, const char *); -extern int unregister_blkdev(unsigned int, const char *); +extern void unregister_blkdev(unsigned int, const char *); extern struct block_device *bdget(dev_t); extern void bd_set_size(struct block_device *, loff_t size); extern void bd_forget(struct inode *inode); -- cgit v1.2.3 From 77293034696e3e0b6c8b8fc1f96be091104b3d2b Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 17 Jul 2007 04:03:49 -0700 Subject: Remove OPEN_MAX The OPEN_MAX macro in limits.h should not be there. It claims to be the limit on file descriptors in a process, but its value is wrong for that. There is no constant value, but a variable resource limit (RLIMIT_NOFILE). Nothing in the kernel uses OPEN_MAX except things that are wrong to do so. I've submitted other patches to remove those uses. The proper thing to do according to POSIX is not to define OPEN_MAX at all. The sysconf (_SC_OPEN_MAX) implementation works by calling getrlimit. Signed-off-by: Roland McGrath Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/limits.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/limits.h b/include/linux/limits.h index eaf2e099f125..c4b4e579c01d 100644 --- a/include/linux/limits.h +++ b/include/linux/limits.h @@ -6,7 +6,6 @@ #define NGROUPS_MAX 65536 /* supplemental group IDs are available */ #define ARG_MAX 131072 /* # bytes of args + environ for exec() */ #define CHILD_MAX 999 /* no limit :-) */ -#define OPEN_MAX 256 /* # open files a process may have */ #define LINK_MAX 127 /* # links a file may have */ #define MAX_CANON 255 /* size of the canonical input queue */ #define MAX_INPUT 255 /* size of the type-ahead buffer */ -- cgit v1.2.3 From f9e86f419073605b4520848021cc042963c227c7 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 17 Jul 2007 04:03:49 -0700 Subject: Remove CHILD_MAX The CHILD_MAX macro in limits.h should not be there. It claims to be the limit on processes a user can own, but its value is wrong for that. There is no constant value, but a variable resource limit (RLIMIT_NPROC). Nothing in the kernel uses CHILD_MAX. The proper thing to do according to POSIX is not to define CHILD_MAX at all. The sysconf (_SC_CHILD_MAX) implementation works by calling getrlimit. Signed-off-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/limits.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/limits.h b/include/linux/limits.h index c4b4e579c01d..2d0f94162fb3 100644 --- a/include/linux/limits.h +++ b/include/linux/limits.h @@ -5,7 +5,6 @@ #define NGROUPS_MAX 65536 /* supplemental group IDs are available */ #define ARG_MAX 131072 /* # bytes of args + environ for exec() */ -#define CHILD_MAX 999 /* no limit :-) */ #define LINK_MAX 127 /* # links a file may have */ #define MAX_CANON 255 /* size of the canonical input queue */ #define MAX_INPUT 255 /* size of the type-ahead buffer */ -- cgit v1.2.3 From b45d52797432bd6b5d9786dbda940eb8d0b9ed06 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 17 Jul 2007 04:03:50 -0700 Subject: sb1250-duart.c: SB1250 DUART serial support This is a driver for the SB1250 DUART, a dual serial port implementation included in the Broadcom family of SOCs descending from the SiByte SB1250 MIPS64 chip multiprocessor. It is a new implementation replacing the old-fashioned driver currently present in the linux-mips.org tree. It supports all the usual features one would expect from a(n asynchronous) serial driver, including modem line control (as far as hardware supports it -- there is edge detection logic missing from the DCD and RI lines and the driver does not implement polling of these lines at the moment), the serial console, BREAK transmission and reception, including the magic SysRq. The receive FIFO threshold is not maintained though. The driver was tested with a SWARM board which uses a BCM1250 SOC (which is dual MIPS64 CMP) and has both ports of the single DUART implemented wired externally. Both were tested. Testing included using the ports as terminal lines at 1200bps (which is the ports minimum), 115200bps and a couple of random speeds inbetween. The modem lines were verified to operate correctly. No testing was performed with a use as a network interface, like with SLIP or PPP. Signed-off-by: Maciej W. Rozycki Acked-by: Ralf Baechle Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/serial_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 7f2c99d66e9d..9c721cd2c9d6 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -142,6 +142,9 @@ /* Micrel KS8695 */ #define PORT_KS8695 76 +/* Broadcom SB1250, etc. SOC */ +#define PORT_SB1250_DUART 77 + #ifdef __KERNEL__ -- cgit v1.2.3 From 9281acea6a3687ff0f262e0be31eac34895b95d7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 17 Jul 2007 04:03:51 -0700 Subject: kallsyms: make KSYM_NAME_LEN include space for trailing '\0' KSYM_NAME_LEN is peculiar in that it does not include the space for the trailing '\0', forcing all users to use KSYM_NAME_LEN + 1 when allocating buffer. This is nonsense and error-prone. Moreover, when the caller forgets that it's very likely to subtly bite back by corrupting the stack because the last position of the buffer is always cleared to zero. This patch increments KSYM_NAME_LEN by one and updates code accordingly. * off-by-one bug in asm-powerpc/kprobes.h::kprobe_lookup_name() macro is fixed. * Where MODULE_NAME_LEN and KSYM_NAME_LEN were used together, MODULE_NAME_LEN was treated as if it didn't include space for the trailing '\0'. Fix it. Signed-off-by: Tejun Heo Acked-by: Paulo Marques Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kallsyms.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 5f06527dca21..f73de6fb5c68 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -7,9 +7,9 @@ #include -#define KSYM_NAME_LEN 127 -#define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + KSYM_NAME_LEN + \ - 2*(BITS_PER_LONG*3/10) + MODULE_NAME_LEN + 1) +#define KSYM_NAME_LEN 128 +#define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ + 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) #ifdef CONFIG_KALLSYMS /* Lookup the address for a symbol. Returns 0 if not found. */ -- cgit v1.2.3 From 5b78cc9ac8602baafebb75a09025ffb17d1aebc2 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 17 Jul 2007 04:03:53 -0700 Subject: make timespec_equal() take const arguments Make arguments of timespec_equal() const struct timespec. Signed-off-by: Jan Engelhardt Cc: Thomas Gleixner Cc: john stultz Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 4bb05a829be9..ec3b0ced0afe 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -36,7 +36,8 @@ struct timezone { #define NSEC_PER_SEC 1000000000L #define FSEC_PER_SEC 1000000000000000L -static inline int timespec_equal(struct timespec *a, struct timespec *b) +static inline int timespec_equal(const struct timespec *a, + const struct timespec *b) { return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); } -- cgit v1.2.3 From dccd573bb02aa011a4a7146c02c409ac0bd722a0 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 17 Jul 2007 04:04:02 -0700 Subject: SPI controller drivers: check for unsupported modes Minor SPI controller driver updates: make the setup() methods reject spi->mode bits they don't support, by masking aginst the inverse of bits they *do* support. This insures against misbehavior later when new mode bits get added. Most controllers can't support SPI_LSB_FIRST; more handle SPI_CS_HIGH. Support for all four SPI clock/transfer modes is routine. Signed-off-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/spi/spi_bitbang.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index 9dbca629dcfb..b8db32cea1de 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -26,6 +26,7 @@ struct spi_bitbang { struct list_head queue; u8 busy; u8 use_dma; + u8 flags; /* extra spi->mode support */ struct spi_master *master; -- cgit v1.2.3 From c06e677aed0c86480b01faa894967daa8aa3568a Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 17 Jul 2007 04:04:03 -0700 Subject: SPI: add 3wire mode flag Add a new spi->mode bit: SPI_3WIRE, for chips where the SI and SO signals are shared (and which are thus only half duplex). Update the LM70 driver to require support for that hardware mode from the controller. Signed-off-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/spi/spi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 1be5ea059477..302b81d1d117 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -76,6 +76,7 @@ struct spi_device { #define SPI_MODE_3 (SPI_CPOL|SPI_CPHA) #define SPI_CS_HIGH 0x04 /* chipselect active high? */ #define SPI_LSB_FIRST 0x08 /* per-word bits-on-wire */ +#define SPI_3WIRE 0x10 /* SI/SO signals shared */ u8 bits_per_word; int irq; void *controller_state; -- cgit v1.2.3 From ad241528c4919505afccb022acbab3eeb0db4d80 Mon Sep 17 00:00:00 2001 From: Jan Nikitenko Date: Tue, 17 Jul 2007 04:04:03 -0700 Subject: CRC7 support Add CRC7 routines, used for example in MMC over SPI communication. Kerneldoc updates [akpm@linux-foundation.org: fix funny mix of const and non-const] Signed-off-by: Jan Nikitenko Signed-off-by: David Brownell Cc: "Randy.Dunlap" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crc7.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/linux/crc7.h (limited to 'include/linux') diff --git a/include/linux/crc7.h b/include/linux/crc7.h new file mode 100644 index 000000000000..1786e772d5c6 --- /dev/null +++ b/include/linux/crc7.h @@ -0,0 +1,14 @@ +#ifndef _LINUX_CRC7_H +#define _LINUX_CRC7_H +#include + +extern const u8 crc7_syndrome_table[256]; + +static inline u8 crc7_byte(u8 crc, u8 data) +{ + return crc7_syndrome_table[(crc << 1) ^ data]; +} + +extern u8 crc7(u8 crc, const u8 *buffer, size_t len); + +#endif -- cgit v1.2.3 From 447aef1a19135a69bfd725c33f7e753740cb8447 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 17 Jul 2007 04:04:10 -0700 Subject: SPI: tle620x power switch driver Add support for the Infineon TLE62x0 series of low-side driver chips, such as the TLE6220 or TLE6230. These can be viewed as output GPIOs specialized for power switching applications. The driver provides a userspace interface to those GPIOs, and to the switch status they provide. Signed-off-by: Ben Dooks Signed-off-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/spi/tle62x0.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 include/linux/spi/tle62x0.h (limited to 'include/linux') diff --git a/include/linux/spi/tle62x0.h b/include/linux/spi/tle62x0.h new file mode 100644 index 000000000000..60b59187e590 --- /dev/null +++ b/include/linux/spi/tle62x0.h @@ -0,0 +1,24 @@ +/* + * tle62x0.h - platform glue to Infineon TLE62x0 driver chips + * + * Copyright 2007 Simtec Electronics + * Ben Dooks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +struct tle62x0_pdata { + unsigned int init_state; + unsigned int gpio_count; +}; -- cgit v1.2.3 From f29ba280ecb46331c1f6842b094808af01131422 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 17 Jul 2007 04:04:12 -0700 Subject: spi_mpc83xx.c: support QE enabled 83xx CPU's like mpc832x Quicc Engine enabled mpc83xx CPU's has a somewhat different HW interface to the SPI controller. This patch adds a qe_mode knob that sees to that needed adaptions are performed. Signed-off-by: Joakim Tjernlund Signed-off-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fsl_devices.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 12e631f0fb77..695741b0e420 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -112,7 +112,7 @@ struct fsl_usb2_platform_data { struct fsl_spi_platform_data { u32 initial_spmode; /* initial SPMODE value */ u16 bus_num; - + bool qe_mode; /* board specific information */ u16 max_chipselect; void (*activate_cs)(u8 cs, u8 polarity); -- cgit v1.2.3 From 67837f232d6d55be99d6e0dec4ea9bb8112840cd Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 17 Jul 2007 04:04:16 -0700 Subject: Use mutex instead of semaphore in CAPI 2.0 driver The CAPI 2.0 driver uses a semaphore as mutex. Use the mutex API instead of the (binary) semaphore. Signed-off-by: Matthias Kaehlcke Acked-by: Karsten Keil Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernelcapi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernelcapi.h b/include/linux/kernelcapi.h index aea34e74c496..8c4350a9ed87 100644 --- a/include/linux/kernelcapi.h +++ b/include/linux/kernelcapi.h @@ -64,7 +64,7 @@ struct capi20_appl { unsigned long nrecvdatapkt; unsigned long nsentctlpkt; unsigned long nsentdatapkt; - struct semaphore recv_sem; + struct mutex recv_mtx; struct sk_buff_head recv_queue; struct work_struct recv_work; int release_in_progress; -- cgit v1.2.3 From a569425512253992cc64ebf8b6d00a62f986db3e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 17 Jul 2007 04:04:28 -0700 Subject: knfsd: exportfs: add exportfs.h header currently the export_operation structure and helpers related to it are in fs.h. fs.h is already far too large and there are very few places needing the export bits, so split them off into a separate header. [akpm@linux-foundation.org: fix cifs build] Signed-off-by: Christoph Hellwig Signed-off-by: Neil Brown Cc: Steven French Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/exportfs.h | 119 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 114 +-------------------------------------------- 2 files changed, 120 insertions(+), 113 deletions(-) create mode 100644 include/linux/exportfs.h (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h new file mode 100644 index 000000000000..fdc306fbba50 --- /dev/null +++ b/include/linux/exportfs.h @@ -0,0 +1,119 @@ +#ifndef LINUX_EXPORTFS_H +#define LINUX_EXPORTFS_H 1 + +#include + +struct dentry; +struct super_block; + + +/** + * struct export_operations - for nfsd to communicate with file systems + * @decode_fh: decode a file handle fragment and return a &struct dentry + * @encode_fh: encode a file handle fragment from a dentry + * @get_name: find the name for a given inode in a given directory + * @get_parent: find the parent of a given directory + * @get_dentry: find a dentry for the inode given a file handle sub-fragment + * @find_exported_dentry: + * set by the exporting module to a standard helper function. + * + * Description: + * The export_operations structure provides a means for nfsd to communicate + * with a particular exported file system - particularly enabling nfsd and + * the filesystem to co-operate when dealing with file handles. + * + * export_operations contains two basic operation for dealing with file + * handles, decode_fh() and encode_fh(), and allows for some other + * operations to be defined which standard helper routines use to get + * specific information from the filesystem. + * + * nfsd encodes information use to determine which filesystem a filehandle + * applies to in the initial part of the file handle. The remainder, termed + * a file handle fragment, is controlled completely by the filesystem. The + * standard helper routines assume that this fragment will contain one or + * two sub-fragments, one which identifies the file, and one which may be + * used to identify the (a) directory containing the file. + * + * In some situations, nfsd needs to get a dentry which is connected into a + * specific part of the file tree. To allow for this, it passes the + * function acceptable() together with a @context which can be used to see + * if the dentry is acceptable. As there can be multiple dentrys for a + * given file, the filesystem should check each one for acceptability before + * looking for the next. As soon as an acceptable one is found, it should + * be returned. + * + * decode_fh: + * @decode_fh is given a &struct super_block (@sb), a file handle fragment + * (@fh, @fh_len) and an acceptability testing function (@acceptable, + * @context). It should return a &struct dentry which refers to the same + * file that the file handle fragment refers to, and which passes the + * acceptability test. If it cannot, it should return a %NULL pointer if + * the file was found but no acceptable &dentries were available, or a + * %ERR_PTR error code indicating why it couldn't be found (e.g. %ENOENT or + * %ENOMEM). + * + * encode_fh: + * @encode_fh should store in the file handle fragment @fh (using at most + * @max_len bytes) information that can be used by @decode_fh to recover the + * file refered to by the &struct dentry @de. If the @connectable flag is + * set, the encode_fh() should store sufficient information so that a good + * attempt can be made to find not only the file but also it's place in the + * filesystem. This typically means storing a reference to de->d_parent in + * the filehandle fragment. encode_fh() should return the number of bytes + * stored or a negative error code such as %-ENOSPC + * + * get_name: + * @get_name should find a name for the given @child in the given @parent + * directory. The name should be stored in the @name (with the + * understanding that it is already pointing to a a %NAME_MAX+1 sized + * buffer. get_name() should return %0 on success, a negative error code + * or error. @get_name will be called without @parent->i_mutex held. + * + * get_parent: + * @get_parent should find the parent directory for the given @child which + * is also a directory. In the event that it cannot be found, or storage + * space cannot be allocated, a %ERR_PTR should be returned. + * + * get_dentry: + * Given a &super_block (@sb) and a pointer to a file-system specific inode + * identifier, possibly an inode number, (@inump) get_dentry() should find + * the identified inode and return a dentry for that inode. Any suitable + * dentry can be returned including, if necessary, a new dentry created with + * d_alloc_root. The caller can then find any other extant dentrys by + * following the d_alias links. If a new dentry was created using + * d_alloc_root, DCACHE_NFSD_DISCONNECTED should be set, and the dentry + * should be d_rehash()ed. + * + * If the inode cannot be found, either a %NULL pointer or an %ERR_PTR code + * can be returned. The @inump will be whatever was passed to + * nfsd_find_fh_dentry() in either the @obj or @parent parameters. + * + * Locking rules: + * get_parent is called with child->d_inode->i_mutex down + * get_name is not (which is possibly inconsistent) + */ + +struct export_operations { + struct dentry *(*decode_fh)(struct super_block *sb, __u32 *fh, + int fh_len, int fh_type, + int (*acceptable)(void *context, struct dentry *de), + void *context); + int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, + int connectable); + int (*get_name)(struct dentry *parent, char *name, + struct dentry *child); + struct dentry * (*get_parent)(struct dentry *child); + struct dentry * (*get_dentry)(struct super_block *sb, void *inump); + + /* This is set by the exporting module to a standard helper */ + struct dentry * (*find_exported_dentry)( + struct super_block *sb, void *obj, void *parent, + int (*acceptable)(void *context, struct dentry *de), + void *context); +}; + +extern struct dentry *find_exported_dentry(struct super_block *sb, void *obj, + void *parent, int (*acceptable)(void *context, struct dentry *de), + void *context); + +#endif /* LINUX_EXPORTFS_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index aa74f7de9dcd..58ce336d4a6b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -289,6 +289,7 @@ extern int dir_notify_enable; #include #include +struct export_operations; struct hd_geometry; struct iovec; struct nameidata; @@ -1278,119 +1279,6 @@ static inline void file_accessed(struct file *file) int sync_inode(struct inode *inode, struct writeback_control *wbc); -/** - * struct export_operations - for nfsd to communicate with file systems - * @decode_fh: decode a file handle fragment and return a &struct dentry - * @encode_fh: encode a file handle fragment from a dentry - * @get_name: find the name for a given inode in a given directory - * @get_parent: find the parent of a given directory - * @get_dentry: find a dentry for the inode given a file handle sub-fragment - * @find_exported_dentry: - * set by the exporting module to a standard helper function. - * - * Description: - * The export_operations structure provides a means for nfsd to communicate - * with a particular exported file system - particularly enabling nfsd and - * the filesystem to co-operate when dealing with file handles. - * - * export_operations contains two basic operation for dealing with file - * handles, decode_fh() and encode_fh(), and allows for some other - * operations to be defined which standard helper routines use to get - * specific information from the filesystem. - * - * nfsd encodes information use to determine which filesystem a filehandle - * applies to in the initial part of the file handle. The remainder, termed - * a file handle fragment, is controlled completely by the filesystem. The - * standard helper routines assume that this fragment will contain one or - * two sub-fragments, one which identifies the file, and one which may be - * used to identify the (a) directory containing the file. - * - * In some situations, nfsd needs to get a dentry which is connected into a - * specific part of the file tree. To allow for this, it passes the - * function acceptable() together with a @context which can be used to see - * if the dentry is acceptable. As there can be multiple dentrys for a - * given file, the filesystem should check each one for acceptability before - * looking for the next. As soon as an acceptable one is found, it should - * be returned. - * - * decode_fh: - * @decode_fh is given a &struct super_block (@sb), a file handle fragment - * (@fh, @fh_len) and an acceptability testing function (@acceptable, - * @context). It should return a &struct dentry which refers to the same - * file that the file handle fragment refers to, and which passes the - * acceptability test. If it cannot, it should return a %NULL pointer if - * the file was found but no acceptable &dentries were available, or a - * %ERR_PTR error code indicating why it couldn't be found (e.g. %ENOENT or - * %ENOMEM). - * - * encode_fh: - * @encode_fh should store in the file handle fragment @fh (using at most - * @max_len bytes) information that can be used by @decode_fh to recover the - * file refered to by the &struct dentry @de. If the @connectable flag is - * set, the encode_fh() should store sufficient information so that a good - * attempt can be made to find not only the file but also it's place in the - * filesystem. This typically means storing a reference to de->d_parent in - * the filehandle fragment. encode_fh() should return the number of bytes - * stored or a negative error code such as %-ENOSPC - * - * get_name: - * @get_name should find a name for the given @child in the given @parent - * directory. The name should be stored in the @name (with the - * understanding that it is already pointing to a a %NAME_MAX+1 sized - * buffer. get_name() should return %0 on success, a negative error code - * or error. @get_name will be called without @parent->i_mutex held. - * - * get_parent: - * @get_parent should find the parent directory for the given @child which - * is also a directory. In the event that it cannot be found, or storage - * space cannot be allocated, a %ERR_PTR should be returned. - * - * get_dentry: - * Given a &super_block (@sb) and a pointer to a file-system specific inode - * identifier, possibly an inode number, (@inump) get_dentry() should find - * the identified inode and return a dentry for that inode. Any suitable - * dentry can be returned including, if necessary, a new dentry created with - * d_alloc_root. The caller can then find any other extant dentrys by - * following the d_alias links. If a new dentry was created using - * d_alloc_root, DCACHE_NFSD_DISCONNECTED should be set, and the dentry - * should be d_rehash()ed. - * - * If the inode cannot be found, either a %NULL pointer or an %ERR_PTR code - * can be returned. The @inump will be whatever was passed to - * nfsd_find_fh_dentry() in either the @obj or @parent parameters. - * - * Locking rules: - * get_parent is called with child->d_inode->i_mutex down - * get_name is not (which is possibly inconsistent) - */ - -struct export_operations { - struct dentry *(*decode_fh)(struct super_block *sb, __u32 *fh, int fh_len, int fh_type, - int (*acceptable)(void *context, struct dentry *de), - void *context); - int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, - int connectable); - - /* the following are only called from the filesystem itself */ - int (*get_name)(struct dentry *parent, char *name, - struct dentry *child); - struct dentry * (*get_parent)(struct dentry *child); - struct dentry * (*get_dentry)(struct super_block *sb, void *inump); - - /* This is set by the exporting module to a standard helper */ - struct dentry * (*find_exported_dentry)( - struct super_block *sb, void *obj, void *parent, - int (*acceptable)(void *context, struct dentry *de), - void *context); - - -}; - -extern struct dentry * -find_exported_dentry(struct super_block *sb, void *obj, void *parent, - int (*acceptable)(void *context, struct dentry *de), - void *context); - struct file_system_type { const char *name; int fs_flags; -- cgit v1.2.3 From 5ca29607331fe37980dc3b488793ef8b1409b722 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 17 Jul 2007 04:04:29 -0700 Subject: knfsd: exportfs: remove iget abuse When the exportfs interface was added the expectation was that filesystems provide an operation to convert from a file handle to an inode/dentry, but it kept a backwards compat option that still calls into iget. Calling into iget from non-filesystem code is very bad, because it gives too little information to filesystem, and simply crashes if the filesystem doesn't implement the ->read_inode routine. Fortunately there are only two filesystems left using this fallback: efs and jfs. This patch moves a copy of export_iget to each of those to implement the get_dentry method. While this is a temporary increase of lines of code in the kernel it allows for a much cleaner interface and important code restructuring in later patches. [akpm@linux-foundation.org: add jfs_get_inode_flags() declaration] Signed-off-by: Dave Kleikamp Signed-off-by: Christoph Hellwig Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/efs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efs_fs.h b/include/linux/efs_fs.h index dfed8009ebff..16cb25cbf7c5 100644 --- a/include/linux/efs_fs.h +++ b/include/linux/efs_fs.h @@ -45,6 +45,7 @@ extern efs_block_t efs_map_block(struct inode *, efs_block_t); extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int); extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *); +extern struct dentry *efs_get_dentry(struct super_block *sb, void *vobjp); extern struct dentry *efs_get_parent(struct dentry *); extern int efs_bmap(struct inode *, int); -- cgit v1.2.3 From d37065cd6d6bbe98fd4be14d6c9e64c0bfa124c5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 17 Jul 2007 04:04:30 -0700 Subject: knfsd: exportfs: add procedural interface for NFSD Currently NFSD calls directly into filesystems through the export_operations structure. I plan to change this interface in various ways in later patches, and want to avoid the export of the default operations to NFSD, so this patch adds two simple exportfs_encode_fh/exportfs_decode_fh helpers for NFSD to call instead of poking into exportfs guts. Signed-off-by: Christoph Hellwig Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/exportfs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index fdc306fbba50..8872fe8392d6 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -5,6 +5,7 @@ struct dentry; struct super_block; +struct vfsmount; /** @@ -116,4 +117,10 @@ extern struct dentry *find_exported_dentry(struct super_block *sb, void *obj, void *parent, int (*acceptable)(void *context, struct dentry *de), void *context); +extern int exportfs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, + int connectable); +extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, __u32 *fh, + int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *), + void *context); + #endif /* LINUX_EXPORTFS_H */ -- cgit v1.2.3 From 9a8db97e7756119689c93c431e8b8324080f5625 Mon Sep 17 00:00:00 2001 From: Marc Eshel Date: Tue, 17 Jul 2007 04:04:35 -0700 Subject: knfsd: lockd: nfsd4: use same grace period for lockd and nfsd4 Both lockd and (in the nfsv4 case) nfsd enforce a "grace period" after reboot, during which clients may reclaim locks from the previous server instance, but may not acquire new locks. Currently the lockd and nfsd enforce grace periods of different lengths. This may cause problems when we reboot a server with both v2/v3 and v4 clients. For example, if the lockd grace period is shorter (as is likely the case), then a v3 client might acquire a new lock that conflicts with a lock already held (but not yet reclaimed) by a v4 client. This patch calculates a lease time that lockd and nfsd can both use. Signed-off-by: Marc Eshel Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockd/bind.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index 246de1d84a26..6f1637c61e10 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -27,6 +27,7 @@ struct nlmsvc_binding { struct nfs_fh *, struct file **); void (*fclose)(struct file *); + unsigned long (*get_grace_period)(void); }; extern struct nlmsvc_binding * nlmsvc_ops; @@ -38,4 +39,12 @@ extern int nlmclnt_proc(struct inode *, int, struct file_lock *); extern int lockd_up(int proto); extern void lockd_down(void); +unsigned long get_nfs_grace_period(void); + +#ifdef CONFIG_NFSD_V4 +unsigned long get_nfs4_grace_period(void); +#else +static inline unsigned long get_nfs4_grace_period(void) {return 0;} +#endif + #endif /* LINUX_LOCKD_BIND_H */ -- cgit v1.2.3 From 33a1060ae7dc671a0208b341bd454009625bb5a6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 17 Jul 2007 04:04:35 -0700 Subject: knfsd: nfsd4: fix NFSv4 filehandle size units confusion NFS4_FHSIZE is measured in bytes, not 4-byte words, so much more space than necessary is being allocated for struct nfs4_cb_recall. I should have wondered why this structure was so much larger than it needed to be! Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index ab5c236bd9a7..732de9cad4a8 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -67,7 +67,7 @@ struct nfs4_cb_recall { int cbr_trunc; stateid_t cbr_stateid; u32 cbr_fhlen; - u32 cbr_fhval[NFS4_FHSIZE]; + char cbr_fhval[NFS4_FHSIZE]; struct nfs4_delegation *cbr_dp; }; -- cgit v1.2.3 From 1e5140279f31e47d58ed6036ee61ba7a65710e63 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 17 Jul 2007 04:04:38 -0700 Subject: knfsd: nfsd: remove unused header interface.h It looks like Al Viro gutted this header file five years ago and it hasn't been touched since. Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/interface.h | 13 ------------- include/linux/nfsd/nfsd.h | 1 - 2 files changed, 14 deletions(-) delete mode 100644 include/linux/nfsd/interface.h (limited to 'include/linux') diff --git a/include/linux/nfsd/interface.h b/include/linux/nfsd/interface.h deleted file mode 100644 index af0979704afb..000000000000 --- a/include/linux/nfsd/interface.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - * include/linux/nfsd/interface.h - * - * defines interface between nfsd and other bits of - * the kernel. Particularly filesystems (eventually). - * - * Copyright (C) 2000 Neil Brown - */ - -#ifndef LINUX_NFSD_INTERFACE_H -#define LINUX_NFSD_INTERFACE_H - -#endif /* LINUX_NFSD_INTERFACE_H */ diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 72feac581aa3..0d8420497765 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -22,7 +22,6 @@ #include #include #include -#include /* * nfsd version */ -- cgit v1.2.3 From c2f1a551dea8b37c2e0cb886885c250fb703e9d8 Mon Sep 17 00:00:00 2001 From: Meelap Shah Date: Tue, 17 Jul 2007 04:04:39 -0700 Subject: knfsd: nfsd4: vary maximum delegation limit based on RAM size Our original NFSv4 delegation policy was to give out a read delegation on any open when it was possible to. Since the lifetime of a delegation isn't limited to that of an open, a client may quite reasonably hang on to a delegation as long as it has the inode cached. This becomes an obvious problem the first time a client's inode cache approaches the size of the server's total memory. Our first quick solution was to add a hard-coded limit. This patch makes a mild incremental improvement by varying that limit according to the server's total memory size, allowing at most 4 delegations per megabyte of RAM. My quick back-of-the-envelope calculation finds that in the worst case (where every delegation is for a different inode), a delegation could take about 1.5K, which would make the worst case usage about 6% of memory. The new limit works out to be about the same as the old on a 1-gig server. [akpm@linux-foundation.org: Don't needlessly bloat vmlinux] [akpm@linux-foundation.org: Make it right for highmem machines] Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/nfsd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 0d8420497765..ce5e345a9bce 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -148,6 +148,7 @@ extern int nfsd_max_blksize; * NFSv4 State */ #ifdef CONFIG_NFSD_V4 +extern unsigned int max_delegations; void nfs4_state_init(void); int nfs4_state_start(void); void nfs4_state_shutdown(void); -- cgit v1.2.3 From 47f9940c55c0bdc65188749cae4e841601f513bb Mon Sep 17 00:00:00 2001 From: Meelap Shah Date: Tue, 17 Jul 2007 04:04:40 -0700 Subject: knfsd: nfsd4: don't delegate files that have had conflicts One more incremental delegation policy improvement: don't give out a delegation on a file if conflicting access has previously required that a delegation be revoked on that file. (In practice we'll forget about the conflict when the struct nfs4_file is removed on close, so this is of limited use for now, though it should at least solve a temporary problem with self-conflicts on write opens from the same client.) Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/state.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 732de9cad4a8..db348f749376 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -224,6 +224,7 @@ struct nfs4_file { struct inode *fi_inode; u32 fi_id; /* used with stateowner->so_id * for stateid_hashtbl hash */ + bool fi_had_conflict; }; /* -- cgit v1.2.3 From c4170583f655dca5da32bd14173d6a93805fc48b Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 17 Jul 2007 04:04:42 -0700 Subject: knfsd: nfsd4: store pseudoflavor in request Add a new field to the svc_rqst structure to record the pseudoflavor that the request was made with. For now we record the pseudoflavor but don't use it for anything. Signed-off-by: Andy Adamson Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/gss_api.h | 1 + include/linux/sunrpc/svc.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index bbac101ac372..459c5fc11d51 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -58,6 +58,7 @@ u32 gss_unwrap( u32 gss_delete_sec_context( struct gss_ctx **ctx_id); +u32 gss_svc_to_pseudoflavor(struct gss_api_mech *, u32 service); u32 gss_pseudoflavor_to_service(struct gss_api_mech *, u32 pseudoflavor); char *gss_service_to_auth_domain_name(struct gss_api_mech *, u32 service); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 129d50f2225c..705a90aa345e 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -212,6 +212,7 @@ struct svc_rqst { struct svc_pool * rq_pool; /* thread pool */ struct svc_procedure * rq_procinfo; /* procedure info */ struct auth_ops * rq_authop; /* authentication flavour */ + u32 rq_flavor; /* pseudoflavor */ struct svc_cred rq_cred; /* auth info */ struct sk_buff * rq_skbuff; /* fast recv inet buffer */ struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ -- cgit v1.2.3 From e677bfe4d451f8271986a229270c6eecd1f62b3f Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 17 Jul 2007 04:04:42 -0700 Subject: knfsd: nfsd4: parse secinfo information in exports downcall We add a list of pseudoflavors to each export downcall, which will be used both as a list of security flavors allowed on that export, and (in the order given) as the list of pseudoflavors to return on secinfo calls. This patch parses the new downcall information and adds it to the export structure, but doesn't use it for anything yet. Signed-off-by: J. Bruce Fields Signed-off-by: Andy Adamson Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/export.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 9f62d6182d32..736f0eafcedf 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -42,6 +42,8 @@ #define NFSEXP_NOACL 0x8000 /* reserved for possible ACL related use */ #define NFSEXP_ALLFLAGS 0xFE3F +/* The flags that may vary depending on security flavor: */ +#define NFSEXP_SECINFO_FLAGS 0 #ifdef __KERNEL__ @@ -64,6 +66,19 @@ struct nfsd4_fs_locations { int migrated; }; +/* + * We keep an array of pseudoflavors with the export, in order from most + * to least preferred. For the forseeable future, we don't expect more + * than the eight pseudoflavors null, unix, krb5, krb5i, krb5p, skpm3, + * spkm3i, and spkm3p (and using all 8 at once should be rare). + */ +#define MAX_SECINFO_LIST 8 + +struct exp_flavor_info { + u32 pseudoflavor; + u32 flags; +}; + struct svc_export { struct cache_head h; struct auth_domain * ex_client; @@ -76,6 +91,8 @@ struct svc_export { int ex_fsid; unsigned char * ex_uuid; /* 16 byte fsid */ struct nfsd4_fs_locations ex_fslocs; + int ex_nflavors; + struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; }; /* an "export key" (expkey) maps a filehandlefragement to an -- cgit v1.2.3 From df547efb03e3e8f9ea726e1d07fbbd6fd0706cd7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 17 Jul 2007 04:04:43 -0700 Subject: knfsd: nfsd4: simplify exp_pseudoroot arguments We're passing three arguments to exp_pseudoroot, two of which are just fields of the svc_rqst. Soon we'll want to pass in a third field as well. So let's just give up and pass in the whole struct svc_rqst. Also sneak in some minor style cleanups while we're at it. Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/export.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 736f0eafcedf..5ed4f277eeac 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -135,7 +135,7 @@ struct svc_export * exp_parent(struct auth_domain *clp, struct cache_req *reqp); int exp_rootfh(struct auth_domain *, char *path, struct knfsd_fh *, int maxsize); -__be32 exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq); +__be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); __be32 nfserrno(int errno); extern struct cache_detail svc_export_cache; -- cgit v1.2.3 From 0989a7889695831e49e2c53c1884f52645516a90 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 17 Jul 2007 04:04:44 -0700 Subject: knfsd: nfsd: provide export lookup wrappers which take a svc_rqst Split the callers of exp_get_by_name(), exp_find(), and exp_parent() into those that are processing requests and those that are doing other stuff (like looking up filehandles for mountd). No change in behavior, just a (fairly pointless, on its own) cleanup. (Note this has the effect of making nfsd_cross_mnt() pass rqstp->rq_client instead of exp->ex_client into exp_find_by_name(). However, the two should have the same value at this point.) Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/export.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 5ed4f277eeac..1ba53e524749 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -129,10 +129,16 @@ struct svc_export * exp_get_by_name(struct auth_domain *clp, struct vfsmount *mnt, struct dentry *dentry, struct cache_req *reqp); +struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, + struct vfsmount *, + struct dentry *); struct svc_export * exp_parent(struct auth_domain *clp, struct vfsmount *mnt, struct dentry *dentry, struct cache_req *reqp); +struct svc_export * rqst_exp_parent(struct svc_rqst *, + struct vfsmount *mnt, + struct dentry *dentry); int exp_rootfh(struct auth_domain *, char *path, struct knfsd_fh *, int maxsize); __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); @@ -152,6 +158,7 @@ static inline void exp_get(struct svc_export *exp) extern struct svc_export * exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp); +struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 3ab4d8b1215d61736e2a9a26bea7cc2e6b029e3d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 17 Jul 2007 04:04:46 -0700 Subject: knfsd: nfsd: set rq_client to ip-address-determined-domain We want it to be possible for users to restrict exports both by IP address and by pseudoflavor. The pseudoflavor information has previously been passed using special auth_domains stored in the rq_client field. After the preceding patch that stored the pseudoflavor in rq_pflavor, that's now superfluous; so now we use rq_client for the ip information, as auth_null and auth_unix do. However, we keep around the special auth_domain in the rq_gssclient field for backwards compatibility purposes, so we can still do upcalls using the old "gss/pseudoflavor" auth_domain if upcalls using the unix domain to give us an appropriate export. This allows us to continue supporting old mountd. In fact, for this first patch, we always use the "gss/pseudoflavor" auth_domain (and only it) if it is available; thus rq_client is ignored in the auth_gss case, and this patch on its own makes no change in behavior; that will be left to later patches. Note on idmap: I'm almost tempted to just replace the auth_domain in the idmap upcall by a dummy value--no version of idmapd has ever used it, and it's unlikely anyone really wants to perform idmapping differently depending on the where the client is (they may want to perform *credential* mapping differently, but that's a different matter--the idmapper just handles id's used in getattr and setattr). But I'm updating the idmapd code anyway, just out of general backwards-compatibility paranoia. Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/svc.h | 1 + include/linux/sunrpc/svcauth.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 705a90aa345e..8531a70da73d 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -249,6 +249,7 @@ struct svc_rqst { */ /* Catering to nfsd */ struct auth_domain * rq_client; /* RPC peer info */ + struct auth_domain * rq_gssclient; /* "gss/"-style peer info */ struct svc_cacherep * rq_cacherep; /* cache info */ struct knfsd_fh * rq_reffh; /* Referrence filehandle, used to * determine what device number diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index de92619b0826..22e1ef8e200e 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -127,6 +127,7 @@ extern struct auth_domain *auth_unix_lookup(struct in_addr addr); extern int auth_unix_forget_old(struct auth_domain *dom); extern void svcauth_unix_purge(void); extern void svcauth_unix_info_release(void *); +extern int svcauth_unix_set_client(struct svc_rqst *rqstp); static inline unsigned long hash_str(char *name, int bits) { -- cgit v1.2.3 From 32c1eb0cd7ee00b5eb7b6f7059c635fbc1052966 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 17 Jul 2007 04:04:48 -0700 Subject: knfsd: nfsd4: return nfserr_wrongsec Make the first actual use of the secinfo information by using it to return nfserr_wrongsec when an export is found that doesn't allow the flavor used on this request. Signed-off-by: J. Bruce Fields Signed-off-by: Andy Adamson Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/export.h | 1 + include/linux/nfsd/nfsd.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 1ba53e524749..424be41130ba 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -116,6 +116,7 @@ struct svc_expkey { #define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE) #define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES) +__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); /* * Function declarations diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index ce5e345a9bce..62499c2f0918 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -236,6 +236,7 @@ void nfsd_lockd_shutdown(void); #define nfserr_badname __constant_htonl(NFSERR_BADNAME) #define nfserr_cb_path_down __constant_htonl(NFSERR_CB_PATH_DOWN) #define nfserr_locked __constant_htonl(NFSERR_LOCKED) +#define nfserr_wrongsec __constant_htonl(NFSERR_WRONGSEC) #define nfserr_replay_me __constant_htonl(NFSERR_REPLAY_ME) /* error codes for internal use */ -- cgit v1.2.3 From 0ec757df9743025f14190d6034d8bd2bf37c2dd1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 17 Jul 2007 04:04:48 -0700 Subject: knfsd: nfsd4: make readonly access depend on pseudoflavor Allow readonly access to vary depending on the pseudoflavor, using the flag passed with each pseudoflavor in the export downcall. The rest of the flags are ignored for now, though some day we might also allow id squashing to vary based on the flavor. Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/export.h | 13 ++++++++++++- include/linux/nfsd/nfsd.h | 3 ++- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 424be41130ba..a01f775cb944 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -112,10 +112,21 @@ struct svc_expkey { #define EX_SECURE(exp) (!((exp)->ex_flags & NFSEXP_INSECURE_PORT)) #define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) -#define EX_RDONLY(exp) ((exp)->ex_flags & NFSEXP_READONLY) #define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE) #define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES) +static inline int EX_RDONLY(struct svc_export *exp, struct svc_rqst *rqstp) +{ + struct exp_flavor_info *f; + struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; + + for (f = exp->ex_flavors; f < end; f++) { + if (f->pseudoflavor == rqstp->rq_flavor) + return f->flags & NFSEXP_READONLY; + } + return exp->ex_flags & NFSEXP_READONLY; +} + __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); /* diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 62499c2f0918..54ef1a18a56c 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -119,7 +119,8 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, struct kstatfs *); int nfsd_notify_change(struct inode *, struct iattr *); -__be32 nfsd_permission(struct svc_export *, struct dentry *, int); +__be32 nfsd_permission(struct svc_rqst *, struct svc_export *, + struct dentry *, int); int nfsd_sync_dir(struct dentry *dp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) -- cgit v1.2.3 From dcb488a3b7ac3987e21148f44f641c9b2e734232 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 17 Jul 2007 04:04:51 -0700 Subject: knfsd: nfsd4: implement secinfo Implement the secinfo operation. (Thanks to Usha Ketineni wrote an earlier version of this support.) Cc: Usha Ketineni Signed-off-by: Andy Adamson Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/nfsd.h | 3 +++ include/linux/nfsd/xdr4.h | 7 +++++++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 54ef1a18a56c..e452256d3f72 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -71,6 +71,9 @@ int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct svc_export **expp); __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, const char *, int, struct svc_fh *); +__be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, + const char *, int, + struct svc_export **, struct dentry **); __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, struct iattr *, int, time_t); #ifdef CONFIG_NFSD_V4 diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 09799bcee0ac..1b653267133a 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -293,6 +293,12 @@ struct nfsd4_rename { struct nfsd4_change_info rn_tinfo; /* response */ }; +struct nfsd4_secinfo { + u32 si_namelen; /* request */ + char *si_name; /* request */ + struct svc_export *si_exp; /* response */ +}; + struct nfsd4_setattr { stateid_t sa_stateid; /* request */ u32 sa_bmval[2]; /* request */ @@ -365,6 +371,7 @@ struct nfsd4_op { struct nfsd4_remove remove; struct nfsd4_rename rename; clientid_t renew; + struct nfsd4_secinfo secinfo; struct nfsd4_setattr setattr; struct nfsd4_setclientid setclientid; struct nfsd4_setclientid_confirm setclientid_confirm; -- cgit v1.2.3 From 4796f45740bc6f2e3e6cc14e7ed481b38bd0bd39 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 17 Jul 2007 04:04:51 -0700 Subject: knfsd: nfsd4: secinfo handling without secinfo= option We could return some sort of error in the case where someone asks for secinfo on an export without the secinfo= option set--that'd be no worse than what we've been doing. But it's not really correct. So, hack up an approximate secinfo response in that case--it may not be complete, but it'll tell the client at least one acceptable security flavor. Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/svcauth_gss.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h index 5a5db16ab660..417a1def56db 100644 --- a/include/linux/sunrpc/svcauth_gss.h +++ b/include/linux/sunrpc/svcauth_gss.h @@ -22,6 +22,7 @@ int gss_svc_init(void); void gss_svc_shutdown(void); int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); +u32 svcauth_gss_flavor(struct auth_domain *dom); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ -- cgit v1.2.3 From 1269bc69b6649282091bb7007372acf4ab8357fd Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 17 Jul 2007 04:04:52 -0700 Subject: knfsd: nfsd: enforce per-flavor id squashing Allow root squashing to vary per-pseudoflavor, so that you can (for example) allow root access only when sufficiently strong security is in use. Signed-off-by: "J. Bruce Fields" Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/export.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index a01f775cb944..78feb7beff75 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -43,7 +43,8 @@ #define NFSEXP_ALLFLAGS 0xFE3F /* The flags that may vary depending on security flavor: */ -#define NFSEXP_SECINFO_FLAGS 0 +#define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \ + | NFSEXP_ALLSQUASH) #ifdef __KERNEL__ -- cgit v1.2.3 From 2e774c7caf84455d5e7d492d123bad6f417818b5 Mon Sep 17 00:00:00 2001 From: Mark Zhan Date: Tue, 17 Jul 2007 04:05:05 -0700 Subject: rtc: add support for the ST M48T59 RTC [akpm@linux-foundation.org: x86_64 build fix] [akpm@linux-foundation.org: The acpi guys changed the bin_attribute code] Signed-off-by: Mark Zhan Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rtc/m48t59.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 include/linux/rtc/m48t59.h (limited to 'include/linux') diff --git a/include/linux/rtc/m48t59.h b/include/linux/rtc/m48t59.h new file mode 100644 index 000000000000..e8c7c21ceb1f --- /dev/null +++ b/include/linux/rtc/m48t59.h @@ -0,0 +1,57 @@ +/* + * include/linux/rtc/m48t59.h + * + * Definitions for the platform data of m48t59 RTC chip driver. + * + * Copyright (c) 2007 Wind River Systems, Inc. + * + * Mark Zhan + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _LINUX_RTC_M48T59_H_ +#define _LINUX_RTC_M48T59_H_ + +/* + * M48T59 Register Offset + */ +#define M48T59_YEAR 0x1fff +#define M48T59_MONTH 0x1ffe +#define M48T59_MDAY 0x1ffd /* Day of Month */ +#define M48T59_WDAY 0x1ffc /* Day of Week */ +#define M48T59_WDAY_CB 0x20 /* Century Bit */ +#define M48T59_WDAY_CEB 0x10 /* Century Enable Bit */ +#define M48T59_HOUR 0x1ffb +#define M48T59_MIN 0x1ffa +#define M48T59_SEC 0x1ff9 +#define M48T59_CNTL 0x1ff8 +#define M48T59_CNTL_READ 0x40 +#define M48T59_CNTL_WRITE 0x80 +#define M48T59_WATCHDOG 0x1ff7 +#define M48T59_INTR 0x1ff6 +#define M48T59_INTR_AFE 0x80 /* Alarm Interrupt Enable */ +#define M48T59_INTR_ABE 0x20 +#define M48T59_ALARM_DATE 0x1ff5 +#define M48T59_ALARM_HOUR 0x1ff4 +#define M48T59_ALARM_MIN 0x1ff3 +#define M48T59_ALARM_SEC 0x1ff2 +#define M48T59_UNUSED 0x1ff1 +#define M48T59_FLAGS 0x1ff0 +#define M48T59_FLAGS_WDT 0x80 /* watchdog timer expired */ +#define M48T59_FLAGS_AF 0x40 /* alarm */ +#define M48T59_FLAGS_BF 0x10 /* low battery */ + +#define M48T59_NVRAM_SIZE 0x1ff0 + +struct m48t59_plat_data { + /* The method to access M48T59 registers, + * NOTE: The 'ofs' should be 0x00~0x1fff + */ + void (*write_byte)(struct device *dev, u32 ofs, u8 val); + unsigned char (*read_byte)(struct device *dev, u32 ofs); +}; + +#endif /* _LINUX_RTC_M48T59_H_ */ -- cgit v1.2.3 From 623e71b035cb5271028500720b3622ba76db42bb Mon Sep 17 00:00:00 2001 From: "Antonino A. Daplas" Date: Tue, 17 Jul 2007 04:05:28 -0700 Subject: fbcon: allow fbcon to use the primary display driver Allow fbcon to select the primary display adapter using the fb_is_primary_device() arch-specific helper. If a a primary adapter is detected, fbcon will unbind the old adapter from the VT layer, then rebind using the new adapter. This requires that bind_/unbind_con_driver() be made public. Because this feature may produce unexpected behavior (from the user's POV), this must be explicitly enabled in Kconfig. [akpm@linux-foundation.org: export unbind_con_driver] Signed-off-by: Antonino Daplas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vt_kern.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index d961635d0e61..0d034d89ee8a 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -75,6 +75,10 @@ int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc); int vt_waitactive(int vt); void change_console(struct vc_data *new_vc); void reset_vc(struct vc_data *vc); +extern int bind_con_driver(const struct consw *csw, int first, int last, + int deflt); +extern int unbind_con_driver(const struct consw *csw, int first, int last, + int deflt); /* * vc_screen.c shares this temporary buffer with the console write code so that -- cgit v1.2.3 From cfafca8067c6defbaeb28cb898b7b3f8abdfe20d Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 17 Jul 2007 04:05:33 -0700 Subject: fbdev: fbcon: console unregistration from unregister_framebuffer This allows for proper console unregistration via the VT layer, and updates the FB layer to use it. This makes debugging new console drivers much easier, since you can properly clean them up before unloading. [adaplas] unregister_framebuffer() is typically called as part of the driver's module_exit(). Doing so otherwise will freeze the machine as the VT layer is holding reference counts on fbcon, and fbcon on the driver. With this change, it allows unregister_framebuffer() to be called safely anywhere as needed. Additions from the original: If multiple drivers are used by fbcon, and if one of them unregisters, a driver will take over the consoles vacated by the outgoing one (via set_con2fb_map). Once only the outgoing driver remains, then fbcon will unbind from the VT layer (if CONFIG_HW_CONSOLE_UNBINDING is set to y). It is important that these drivers implement fb_open() and fb_release() just to ensure that no other process is using the driver. Likewise, these drivers _must_ check the return value of unregister_framebuffer(). [akpm@linux-foundation.org: make fbcon_unbind() stub inline] Signed-off-by: Jesse Barnes Signed-off-by: Antonino Daplas Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 66226824ab68..8628423c6dd3 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -529,6 +529,8 @@ struct fb_cursor_user { #define FB_EVENT_CONBLANK 0x0C /* Get drawing requirements */ #define FB_EVENT_GET_REQ 0x0D +/* Unbind from the console if possible */ +#define FB_EVENT_FB_UNBIND 0x0E struct fb_event { struct fb_info *info; -- cgit v1.2.3 From b7269dd2b97b9aedb64e15fdec5575345d091925 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 17 Jul 2007 04:05:34 -0700 Subject: vt: add comment for unbind_con_driver() - add comment for unbind_con_driver(). - bind_con_driver() is made private again Signed-off-by: Jesse Barnes Signed-off-by: Antonino Daplas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vt_kern.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index 0d034d89ee8a..699b7e9864fa 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -75,8 +75,6 @@ int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc); int vt_waitactive(int vt); void change_console(struct vc_data *new_vc); void reset_vc(struct vc_data *vc); -extern int bind_con_driver(const struct consw *csw, int first, int last, - int deflt); extern int unbind_con_driver(const struct consw *csw, int first, int last, int deflt); -- cgit v1.2.3 From 9900abfb5e8192f0eafcd9b9dd5d54011e46c76c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Jul 2007 04:05:50 -0700 Subject: fbdev: Add fb_append_extra_logo() Add fb_append_extra_logo(), to append extra lines of logos below the standard Linux logo. Signed-off-by: Geert Uytterhoeven Signed-off-by: Geoff Levand Acked-By: James Simmons Cc: "Antonino A. Daplas" Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/linux_logo.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linux_logo.h b/include/linux/linux_logo.h index 9c01bde5bf1b..08a92969c76e 100644 --- a/include/linux/linux_logo.h +++ b/include/linux/linux_logo.h @@ -33,5 +33,13 @@ struct linux_logo { }; extern const struct linux_logo *fb_find_logo(int depth); +#ifdef CONFIG_FB_LOGO_EXTRA +extern void fb_append_extra_logo(const struct linux_logo *logo, + unsigned int n); +#else +static inline void fb_append_extra_logo(const struct linux_logo *logo, + unsigned int n) +{} +#endif #endif /* _LINUX_LINUX_LOGO_H */ -- cgit v1.2.3 From fe0e3a9df6372d357d3fdc4b6265a5417f1e84e8 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 17 Jul 2007 04:05:55 -0700 Subject: OMAP: add TI OMAP1610 accelerator entry. Signed-off-by: Trilok Soni Cc: Tony Lindgren Cc: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 8628423c6dd3..cec54106aa87 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -119,6 +119,7 @@ struct dentry; #define FB_ACCEL_NV_40 46 /* nVidia Arch 40 */ #define FB_ACCEL_XGI_VOLARI_V 47 /* XGI Volari V3XT, V5, V8 */ #define FB_ACCEL_XGI_VOLARI_Z 48 /* XGI Volari Z7 */ +#define FB_ACCEL_OMAP1610 49 /* TI OMAP16xx */ #define FB_ACCEL_NEOMAGIC_NM2070 90 /* NeoMagic NM2070 */ #define FB_ACCEL_NEOMAGIC_NM2090 91 /* NeoMagic NM2090 */ #define FB_ACCEL_NEOMAGIC_NM2093 92 /* NeoMagic NM2093 */ -- cgit v1.2.3 From 713f6ab18b0e7d39f14401362bfe8015b1aedde1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 17 Jul 2007 04:06:12 -0700 Subject: md: improve the is_mddev_idle test fix Don't use 'unsigned' variable to track sync vs non-sync IO, as the only thing we want to do with them is a signed comparison, and fix up the comment which had become quite wrong. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_k.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index de72c49747c8..28ac632b42dd 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -51,7 +51,7 @@ struct mdk_rdev_s sector_t size; /* Device size (in blocks) */ mddev_t *mddev; /* RAID array if running */ - unsigned long last_events; /* IO event timestamp */ + long last_events; /* IO event timestamp */ struct block_device *bdev; /* block device handle */ -- cgit v1.2.3 From 4ad1366376bfef32ec0ffa12d1faa483d6f330bd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 17 Jul 2007 04:06:13 -0700 Subject: md: change bitmap_unplug and others to void functions bitmap_unplug only ever returns 0, so it may as well be void. Two callers try to print a message if it returns non-zero, but that message is already printed by bitmap_file_kick. write_page returns an error which is not consistently checked. It always causes BITMAP_WRITE_ERROR to be set on an error, and that can more conveniently be checked. When the return of write_page is checked, an error causes bitmap_file_kick to be called - so move that call into write_page - and protect against recursive calls into bitmap_file_kick. bitmap_update_sb returns an error that is never checked. So make these 'void' and be consistent about checking the bit. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/bitmap.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h index dd5a05d03d4f..75e17a05540e 100644 --- a/include/linux/raid/bitmap.h +++ b/include/linux/raid/bitmap.h @@ -262,7 +262,7 @@ int bitmap_active(struct bitmap *bitmap); char *file_path(struct file *file, char *buf, int count); void bitmap_print_sb(struct bitmap *bitmap); -int bitmap_update_sb(struct bitmap *bitmap); +void bitmap_update_sb(struct bitmap *bitmap); int bitmap_setallbits(struct bitmap *bitmap); void bitmap_write_all(struct bitmap *bitmap); @@ -278,8 +278,8 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int d void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted); void bitmap_close_sync(struct bitmap *bitmap); -int bitmap_unplug(struct bitmap *bitmap); -int bitmap_daemon_work(struct bitmap *bitmap); +void bitmap_unplug(struct bitmap *bitmap); +void bitmap_daemon_work(struct bitmap *bitmap); #endif #endif -- cgit v1.2.3 From 3bd858ab1c451725c07a805dcb315215dc85b86e Mon Sep 17 00:00:00 2001 From: Satyam Sharma Date: Tue, 17 Jul 2007 15:00:08 +0530 Subject: Introduce is_owner_or_cap() to wrap CAP_FOWNER use with fsuid check Introduce is_owner_or_cap() macro in fs.h, and convert over relevant users to it. This is done because we want to avoid bugs in the future where we check for only effective fsuid of the current task against a file's owning uid, without simultaneously checking for CAP_FOWNER as well, thus violating its semantics. [ XFS uses special macros and structures, and in general looked ... untouchable, so we leave it alone -- but it has been looked over. ] The (current->fsuid != inode->i_uid) check in generic_permission() and exec_permission_lite() is left alone, because those operations are covered by CAP_DAC_OVERRIDE and CAP_DAC_READ_SEARCH. Similarly operations falling under the purview of CAP_CHOWN and CAP_LEASE are also left alone. Signed-off-by: Satyam Sharma Cc: Al Viro Acked-by: Serge E. Hallyn Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 58ce336d4a6b..98205f680476 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -284,6 +284,7 @@ extern int dir_notify_enable; #include #include #include +#include #include #include @@ -990,6 +991,9 @@ enum { #define put_fs_excl() atomic_dec(¤t->fs_excl) #define has_fs_excl() atomic_read(¤t->fs_excl) +#define is_owner_or_cap(inode) \ + ((current->fsuid == (inode)->i_uid) || capable(CAP_FOWNER)) + /* not quite ready to be deprecated, but... */ extern void lock_super(struct super_block *); extern void unlock_super(struct super_block *); -- cgit v1.2.3 From 8dfd588c3180b7403c402b4545164ee4543f8f86 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Jul 2007 22:29:46 +0100 Subject: smp_call_function_single() should be a macro on UP ... or we end up with header include order problems from hell. E.g. on m68k this is 100% fatal - local_irq_enable() there wants preempt_count(), which wants task_struct fields, which we won't have when we are in smp.h pulled from sched.h. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/smp.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 8039daced688..259a13c3bd98 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -7,7 +7,6 @@ */ #include -#include extern void cpu_idle(void); @@ -100,15 +99,14 @@ static inline int up_smp_call_function(void) static inline void smp_send_reschedule(int cpu) { } #define num_booting_cpus() 1 #define smp_prepare_boot_cpu() do {} while (0) -static inline int smp_call_function_single(int cpuid, void (*func) (void *info), - void *info, int retry, int wait) -{ - WARN_ON(cpuid != 0); - local_irq_disable(); - func(info); - local_irq_enable(); - return 0; -} +#define smp_call_function_single(cpuid, func, info, retry, wait) \ +({ \ + WARN_ON(cpuid != 0); \ + local_irq_disable(); \ + (func)(info); \ + local_irq_enable(); \ + 0; \ +}) #endif /* !SMP */ -- cgit v1.2.3 From cb32da0416b823b7f4b65e7e85d6cba16ca4d1e1 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 18 Jul 2007 09:18:36 +0900 Subject: slob: Kill off duplicate kzalloc() definition. With the slab zeroing allocations cleanups Christoph stubbed in a generic kzalloc(), which was missed on SLOB. Follow the SLAB/SLUB changes and kill off the __kzalloc() wrapper that SLOB was using. Reported-by: Jan Engelhardt Signed-off-by: Paul Mundt Signed-off-by: Linus Torvalds --- include/linux/slob_def.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h index a2daf2d418a9..59a3fa476ab9 100644 --- a/include/linux/slob_def.h +++ b/include/linux/slob_def.h @@ -33,14 +33,4 @@ static inline void *__kmalloc(size_t size, gfp_t flags) return kmalloc(size, flags); } -/** - * kzalloc - allocate memory. The memory is set to zero. - * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate (see kcalloc). - */ -static inline void *kzalloc(size_t size, gfp_t flags) -{ - return __kzalloc(size, flags); -} - #endif /* __LINUX_SLOB_DEF_H */ -- cgit v1.2.3 From 97ac73506c0ba93f30239bb57b4cfc5d73e68a62 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Tue, 17 Jul 2007 21:42:44 -0400 Subject: sys_fallocate() implementation on i386, x86_64 and powerpc fallocate() is a new system call being proposed here which will allow applications to preallocate space to any file(s) in a file system. Each file system implementation that wants to use this feature will need to support an inode operation called ->fallocate(). Applications can use this feature to avoid fragmentation to certain level and thus get faster access speed. With preallocation, applications also get a guarantee of space for particular file(s) - even if later the the system becomes full. Currently, glibc provides an interface called posix_fallocate() which can be used for similar cause. Though this has the advantage of working on all file systems, but it is quite slow (since it writes zeroes to each block that has to be preallocated). Without a doubt, file systems can do this more efficiently within the kernel, by implementing the proposed fallocate() system call. It is expected that posix_fallocate() will be modified to call this new system call first and incase the kernel/filesystem does not implement it, it should fall back to the current implementation of writing zeroes to the new blocks. ToDos: 1. Implementation on other architectures (other than i386, x86_64, and ppc). Patches for s390(x) and ia64 are already available from previous posts, but it was decided that they should be added later once fallocate is in the mainline. Hence not including those patches in this take. 2. Changes to glibc, a) to support fallocate() system call b) to make posix_fallocate() and posix_fallocate64() call fallocate() Signed-off-by: Amit Arora --- include/linux/falloc.h | 6 ++++++ include/linux/fs.h | 2 ++ include/linux/syscalls.h | 1 + 3 files changed, 9 insertions(+) create mode 100644 include/linux/falloc.h (limited to 'include/linux') diff --git a/include/linux/falloc.h b/include/linux/falloc.h new file mode 100644 index 000000000000..8e912ab6a072 --- /dev/null +++ b/include/linux/falloc.h @@ -0,0 +1,6 @@ +#ifndef _FALLOC_H_ +#define _FALLOC_H_ + +#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */ + +#endif /* _FALLOC_H_ */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 98205f680476..0b806c5e32eb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1147,6 +1147,8 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); + long (*fallocate)(struct inode *inode, int mode, loff_t offset, + loff_t len); }; struct seq_file; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 83d0ec11235e..7a8b1e3322e0 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -610,6 +610,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas asmlinkage long sys_timerfd(int ufd, int clockid, int flags, const struct itimerspec __user *utmr); asmlinkage long sys_eventfd(unsigned int count); +asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -- cgit v1.2.3 From a2df2a63407803a833f82e1fa6693826c8c9d584 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Tue, 17 Jul 2007 21:42:41 -0400 Subject: fallocate support in ext4 This patch implements ->fallocate() inode operation in ext4. With this patch users of ext4 file systems will be able to use fallocate() system call for persistent preallocation. Current implementation only supports preallocation for regular files (directories not supported as of date) with extent maps. This patch does not support block-mapped files currently. Only FALLOC_ALLOCATE and FALLOC_RESV_SPACE modes are being supported as of now. Signed-off-by: Amit Arora --- include/linux/ext4_fs.h | 8 ++++++++ include/linux/ext4_fs_extents.h | 15 +++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index de1f9f78625a..87c2d7a05b01 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -102,6 +102,7 @@ EXT4_GOOD_OLD_FIRST_INO : \ (s)->s_first_ino) #endif +#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) /* * Macro-instructions used to manage fragments @@ -225,6 +226,11 @@ struct ext4_new_group_data { __u32 free_blocks_count; }; +/* + * Following is used by preallocation code to tell get_blocks() that we + * want uninitialzed extents. + */ +#define EXT4_CREATE_UNINITIALIZED_EXT 2 /* * ioctl commands @@ -983,6 +989,8 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, extern void ext4_ext_truncate(struct inode *, struct page *); extern void ext4_ext_init(struct super_block *); extern void ext4_ext_release(struct super_block *); +extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, + loff_t len); static inline int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, unsigned long max_blocks, struct buffer_head *bh, diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h index acfe59740b03..e3d5afc6f23e 100644 --- a/include/linux/ext4_fs_extents.h +++ b/include/linux/ext4_fs_extents.h @@ -188,6 +188,21 @@ ext4_ext_invalidate_cache(struct inode *inode) EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO; } +static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) +{ + ext->ee_len |= cpu_to_le16(0x8000); +} + +static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext) +{ + return (int)(le16_to_cpu((ext)->ee_len) & 0x8000); +} + +static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) +{ + return (int)(le16_to_cpu((ext)->ee_len) & 0x7FFF); +} + extern int ext4_extent_tree_init(handle_t *, struct inode *); extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); -- cgit v1.2.3 From 56055d3ae4cc7fa6d2b10885f20269de8a989ed7 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Tue, 17 Jul 2007 21:42:38 -0400 Subject: write support for preallocated blocks This patch adds write support to the uninitialized extents that get created when a preallocation is done using fallocate(). It takes care of splitting the extents into multiple (upto three) extents and merging the new split extents with neighbouring ones, if possible. Signed-off-by: Amit Arora --- include/linux/ext4_fs_extents.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h index e3d5afc6f23e..edf49ec89eac 100644 --- a/include/linux/ext4_fs_extents.h +++ b/include/linux/ext4_fs_extents.h @@ -205,6 +205,9 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) extern int ext4_extent_tree_init(handle_t *, struct inode *); extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); +extern int ext4_ext_try_to_merge(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *); extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *); -- cgit v1.2.3 From e4f48861993294c27849076741eb0c090482560b Mon Sep 17 00:00:00 2001 From: Semih Hazar Date: Wed, 18 Jul 2007 00:35:56 -0400 Subject: Input: ads7846 - introduce sample settling delay The ads7846 driver has support for filtering, but when the chip gets deselected between samples this causes noise. This patch adds support for an optional settling delay time, so that two consecutive samples will be taken with the specified delay time apart. This ensures that the chip won't be deselected, so the noise won't appear. Filtering can still be done, but will have less work to do since each time a new sample is taken the same delay applies. Signed-off-by: Semih Hazar Signed-off-by: David Brownell Signed-off-by: Dmitry Torokhov --- include/linux/spi/ads7846.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h index 3387e44dfd13..a44fa7a02bd9 100644 --- a/include/linux/spi/ads7846.h +++ b/include/linux/spi/ads7846.h @@ -16,6 +16,14 @@ struct ads7846_platform_data { u16 vref_delay_usecs; /* 0 for external vref; etc */ int keep_vref_on:1; /* set to keep vref on for differential * measurements as well */ + + /* Settling time of the analog signals; a function of Vcc and the + * capacitance on the X/Y drivers. If set to non-zero, two samples + * are taken with settle_delay us apart, and the second one is used. + * ~150 uSec with 0.01uF caps. + */ + u16 settle_delay_usecs; + u16 x_plate_ohms; u16 y_plate_ohms; -- cgit v1.2.3 From 1d25891f3241103d14ea78236504474a138b8ada Mon Sep 17 00:00:00 2001 From: Semih Hazar Date: Wed, 18 Jul 2007 00:36:04 -0400 Subject: Input: ads7846 - re-check pendown status before reporting events Pendown status from the PENIRQ pin is currently read only at the beginning of a sample set. If the pen is lifted just after sampling has began then sampled values become wrong. This patch adds an optional platform penirq_recheck_delay attribute. If non-zero, samples are only reported to the input subsystem if PENIRQ is still active that long after the samples taken. Signed-off-by: Semih Hazar Signed-off-by: David Brownell Signed-off-by: Dmitry Torokhov --- include/linux/spi/ads7846.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h index a44fa7a02bd9..334d31411629 100644 --- a/include/linux/spi/ads7846.h +++ b/include/linux/spi/ads7846.h @@ -24,6 +24,12 @@ struct ads7846_platform_data { */ u16 settle_delay_usecs; + /* If set to non-zero, after samples are taken this delay is applied + * and penirq is rechecked, to help avoid false events. This value + * is affected by the material used to build the touch layer. + */ + u16 penirq_recheck_delay_usecs; + u16 x_plate_ohms; u16 y_plate_ohms; -- cgit v1.2.3 From 85f202d5df877f8adcda342b74ab11fbdfea753d Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 18 Jul 2007 00:37:01 -0400 Subject: Input: add driver for Fujitsu serial touchscreens These serial touchscreens are found on some Fujitsu lifebook P-series laptops, and the B6210. Using this requires a new version of inputattach and doing: inputattach -fjt /dev/ttyS0 Big thanks to Stephen Hemminger for testing it and making it work on his B6210 laptop. Signed-off-by: Stephen Hemminger Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serio.h b/include/linux/serio.h index d9377ce9ffd1..9f3825014674 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -210,5 +210,6 @@ static inline void serio_unpin_driver(struct serio *serio) #define SERIO_TOUCHRIGHT 0x32 #define SERIO_TOUCHWIN 0x33 #define SERIO_TAOSEVM 0x34 +#define SERIO_FUJITSU 0x35 #endif -- cgit v1.2.3 From 5517853712f1f6daac8a7b2590f9b821e767aa13 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 18 Jul 2007 00:38:45 -0400 Subject: Input: document intended meaning of KEY_SWITCHVIDEOMODE Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 18c98b543030..e02c6a66b2ba 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -344,7 +344,8 @@ struct input_absinfo { #define KEY_BRIGHTNESSUP 225 #define KEY_MEDIA 226 -#define KEY_SWITCHVIDEOMODE 227 +#define KEY_SWITCHVIDEOMODE 227 /* Cycle between available video + outputs (Monitor/LCD/TV-out/etc) */ #define KEY_KBDILLUMTOGGLE 228 #define KEY_KBDILLUMDOWN 229 #define KEY_KBDILLUMUP 230 -- cgit v1.2.3 From 456ad75c89cdb72e11dcdb6b0794802a6f50c8a3 Mon Sep 17 00:00:00 2001 From: Denis Cheng Date: Wed, 18 Jul 2007 02:10:54 -0700 Subject: [NET]: move dev_mc_discard from dev_mcast.c to dev.c Because this function is only called by unregister_netdevice, this moving could make this non-global function static, and also remove its declaration in netdevice.h; Any further, function __dev_addr_discard is also just called by dev_mc_discard and dev_unicast_discard, keeping this two functions both in one c file could make __dev_addr_discard also static and remove its declaration in netdevice.h; Futhermore, the sequential call to dev_unicast_discard and then dev_mc_discard in unregister_netdevice have a similar mechanism that: (netif_tx_lock_bh / __dev_addr_discard / netif_tx_unlock_bh), they should merged into one to eliminate duplicates in acquiring and releasing the dev->_xmit_lock, this would be done in my following patch. Signed-off-by: Denis Cheng Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index da7a13c97eb8..9820ca1e45e2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1098,10 +1098,8 @@ extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); extern int dev_mc_sync(struct net_device *to, struct net_device *from); extern void dev_mc_unsync(struct net_device *to, struct net_device *from); -extern void dev_mc_discard(struct net_device *dev); extern int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all); extern int __dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly); -extern void __dev_addr_discard(struct dev_addr_list **list); extern void dev_set_promiscuity(struct net_device *dev, int inc); extern void dev_set_allmulti(struct net_device *dev, int inc); extern void netdev_state_change(struct net_device *dev); -- cgit v1.2.3 From ebd61cc042b16e6cf2486aafbfff9e4be8c213ee Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 18 Jul 2007 02:21:50 -0700 Subject: [NETFILTER]: ipt_iprange.h must #include ipt_iprange.h must #include since it uses __be32. This patch fixes kernel Bugzilla #7604. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_iprange.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ipt_iprange.h b/include/linux/netfilter_ipv4/ipt_iprange.h index 34ab0fb736e2..a92fefc3c7ec 100644 --- a/include/linux/netfilter_ipv4/ipt_iprange.h +++ b/include/linux/netfilter_ipv4/ipt_iprange.h @@ -1,6 +1,8 @@ #ifndef _IPT_IPRANGE_H #define _IPT_IPRANGE_H +#include + #define IPRANGE_SRC 0x01 /* Match source IP address */ #define IPRANGE_DST 0x02 /* Match destination IP address */ #define IPRANGE_SRC_INV 0x10 /* Negate the condition */ -- cgit v1.2.3 From 749269facaf87f6e516c3af12763e03181b9c139 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Wed, 18 Jul 2007 09:02:56 -0400 Subject: Change on-disk format to support 2^15 uninitialized extents This change was suggested by Andreas Dilger. This patch changes the EXT_MAX_LEN value and extent code which marks/checks uninitialized extents. With this change it will be possible to have initialized extents with 2^15 blocks (earlier the max blocks we could have was 2^15 - 1). This way we can have better extent-to-block alignment. Now, maximum number of blocks we can have in an initialized extent is 2^15 and in an uninitialized extent is 2^15 - 1. Signed-off-by: Amit Arora --- include/linux/ext4_fs_extents.h | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h index edf49ec89eac..81406f3655d4 100644 --- a/include/linux/ext4_fs_extents.h +++ b/include/linux/ext4_fs_extents.h @@ -141,7 +141,25 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, #define EXT_MAX_BLOCK 0xffffffff -#define EXT_MAX_LEN ((1UL << 15) - 1) +/* + * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an + * initialized extent. This is 2^15 and not (2^16 - 1), since we use the + * MSB of ee_len field in the extent datastructure to signify if this + * particular extent is an initialized extent or an uninitialized (i.e. + * preallocated). + * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an + * uninitialized extent. + * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an + * uninitialized one. In other words, if MSB of ee_len is set, it is an + * uninitialized extent with only one special scenario when ee_len = 0x8000. + * In this case we can not have an uninitialized extent of zero length and + * thus we make it as a special case of initialized extent with 0x8000 length. + * This way we get better extent-to-group alignment for initialized extents. + * Hence, the maximum number of blocks we can have in an *initialized* + * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767). + */ +#define EXT_INIT_MAX_LEN (1UL << 15) +#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1) #define EXT_FIRST_EXTENT(__hdr__) \ @@ -190,17 +208,22 @@ ext4_ext_invalidate_cache(struct inode *inode) static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) { - ext->ee_len |= cpu_to_le16(0x8000); + /* We can not have an uninitialized extent of zero length! */ + BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0); + ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN); } static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext) { - return (int)(le16_to_cpu((ext)->ee_len) & 0x8000); + /* Extent with ee_len of 0x8000 is treated as an initialized extent */ + return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN); } static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) { - return (int)(le16_to_cpu((ext)->ee_len) & 0x7FFF); + return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ? + le16_to_cpu(ext->ee_len) : + (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); } extern int ext4_extent_tree_init(handle_t *, struct inode *); -- cgit v1.2.3 From ff9ddf7e847c4dc533f119efb6c77a6e57ab6397 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 18 Jul 2007 09:24:20 -0400 Subject: ext4: copy i_flags to inode flags on write Propagate flags such as S_APPEND, S_IMMUTABLE, etc. from i_flags into ext4-specific i_flags. Quota code changes these flags on quota files (to make it harder for sysadmin to screw himself) and these changes were not correctly propagated into the filesystem. (This is a forward port patch from ext3) Signed-off-by: Jan Kara Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- include/linux/ext4_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 87c2d7a05b01..33b2b1a2d790 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -868,6 +868,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern void ext4_truncate (struct inode *); extern void ext4_set_inode_flags(struct inode *); +extern void ext4_get_inode_flags(struct ext4_inode_info *); extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_block_truncate_page(handle_t *handle, struct page *page, -- cgit v1.2.3 From e23291b9120c11aafb2ee76fb71a062eb3c1056c Mon Sep 17 00:00:00 2001 From: "Jose R. Santos" Date: Wed, 18 Jul 2007 08:57:06 -0400 Subject: jbd2: Fix CONFIG_JBD_DEBUG ifdef to be CONFIG_JBD2_DEBUG When the JBD code was forked to create the new JBD2 code base, the references to CONFIG_JBD_DEBUG where never changed to CONFIG_JBD2_DEBUG. This patch fixes that. Signed-off-by: Jose R. Santos Signed-off-by: "Theodore Ts'o" --- include/linux/ext4_fs.h | 4 ++-- include/linux/ext4_fs_sb.h | 2 +- include/linux/jbd2.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 33b2b1a2d790..45ec7258b2b1 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -243,7 +243,7 @@ struct ext4_new_group_data { #define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG #define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) #endif #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) @@ -259,7 +259,7 @@ struct ext4_new_group_data { #define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG #define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) #endif #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h index 2347557a327a..0f7dc15924bf 100644 --- a/include/linux/ext4_fs_sb.h +++ b/include/linux/ext4_fs_sb.h @@ -73,7 +73,7 @@ struct ext4_sb_info { struct list_head s_orphan; unsigned long s_commit_interval; struct block_device *journal_bdev; -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ #endif diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0e0fedd2039a..a37aca31de46 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -50,11 +50,11 @@ */ #define JBD_DEFAULT_MAX_COMMIT_AGE 5 -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG /* * Define JBD_EXPENSIVE_CHECKING to enable more expensive internal * consistency checks. By default we don't do this unless - * CONFIG_JBD_DEBUG is on. + * CONFIG_JBD2_DEBUG is on. */ #define JBD_EXPENSIVE_CHECKING extern int jbd2_journal_enable_debug; -- cgit v1.2.3 From 0f49d5d019afa4e94253bfc92f0daca3badb990b Mon Sep 17 00:00:00 2001 From: "Jose R. Santos" Date: Wed, 18 Jul 2007 08:50:18 -0400 Subject: jbd2: Move jbd2-debug file to debugfs The jbd2-debug file used to be located in /proc/sys/fs/jbd2-debug, but it incorrectly used create_proc_entry() instead of the sysctl routines, and no proc entry was ever created. Instead of fixing this we might as well move the jbd2-debug file to debugfs which would be the preferred location for this kind of tunable. The new location is now /sys/kernel/debug/jbd2/jbd2-debug. Signed-off-by: Jose R. Santos Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a37aca31de46..260d6d76c5f3 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -57,7 +57,7 @@ * CONFIG_JBD2_DEBUG is on. */ #define JBD_EXPENSIVE_CHECKING -extern int jbd2_journal_enable_debug; +extern u8 jbd2_journal_enable_debug; #define jbd_debug(n, f, a...) \ do { \ -- cgit v1.2.3 From ef7f38359ea8b3e9c7f2cae9a4d4935f55ca9e80 Mon Sep 17 00:00:00 2001 From: Kalpak Shah Date: Wed, 18 Jul 2007 09:15:20 -0400 Subject: ext4: Add nanosecond timestamps This patch adds nanosecond timestamps for ext4. This involves adding *time_extra fields to the ext4_inode to extend the timestamps to 64-bits. Creation time is also added by this patch. These extended fields will fit into an inode if the filesystem was formatted with large inodes (-I 256 or larger) and there are currently no EAs consuming all of the available space. For new inodes we always reserve enough space for the kernel's known extended fields, but for inodes created with an old kernel this might not have been the case. So this patch also adds the EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE feature flag(ro-compat so that older kernels can't create inodes with a smaller extra_isize). which indicates if the fields fitting inside s_min_extra_isize are available or not. If the expansion of inodes if unsuccessful then this feature will be disabled. This feature is only enabled if requested by the sysadmin. None of the extended inode fields is critical for correct filesystem operation. Signed-off-by: Andreas Dilger Signed-off-by: Kalpak Shah Signed-off-by: Eric Sandeen Signed-off-by: Dave Kleikamp Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- include/linux/ext4_fs.h | 86 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/ext4_fs_i.h | 5 +++ include/linux/ext4_fs_sb.h | 1 + 3 files changed, 91 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 45ec7258b2b1..df5e38faa15f 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -288,7 +288,7 @@ struct ext4_inode { __le16 i_uid; /* Low 16 bits of Owner Uid */ __le32 i_size; /* Size in bytes */ __le32 i_atime; /* Access time */ - __le32 i_ctime; /* Creation time */ + __le32 i_ctime; /* Inode Change time */ __le32 i_mtime; /* Modification time */ __le32 i_dtime; /* Deletion Time */ __le16 i_gid; /* Low 16 bits of Group Id */ @@ -337,10 +337,85 @@ struct ext4_inode { } osd2; /* OS dependent 2 */ __le16 i_extra_isize; __le16 i_pad1; + __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ + __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ + __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ + __le32 i_crtime; /* File Creation time */ + __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ }; #define i_size_high i_dir_acl +#define EXT4_EPOCH_BITS 2 +#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) +#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS) + +/* + * Extended fields will fit into an inode if the filesystem was formatted + * with large inodes (-I 256 or larger) and there are not currently any EAs + * consuming all of the available space. For new inodes we always reserve + * enough space for the kernel's known extended fields, but for inodes + * created with an old kernel this might not have been the case. None of + * the extended inode fields is critical for correct filesystem operation. + * This macro checks if a certain field fits in the inode. Note that + * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize + */ +#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \ + ((offsetof(typeof(*ext4_inode), field) + \ + sizeof((ext4_inode)->field)) \ + <= (EXT4_GOOD_OLD_INODE_SIZE + \ + (einode)->i_extra_isize)) \ + +static inline __le32 ext4_encode_extra_time(struct timespec *time) +{ + return cpu_to_le32((sizeof(time->tv_sec) > 4 ? + time->tv_sec >> 32 : 0) | + ((time->tv_nsec << 2) & EXT4_NSEC_MASK)); +} + +static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) +{ + if (sizeof(time->tv_sec) > 4) + time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) + << 32; + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2; +} + +#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ +do { \ + (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ + (raw_inode)->xtime ## _extra = \ + ext4_encode_extra_time(&(inode)->xtime); \ +} while (0) + +#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \ +do { \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ + (raw_inode)->xtime ## _extra = \ + ext4_encode_extra_time(&(einode)->xtime); \ +} while (0) + +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \ +do { \ + (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ + ext4_decode_extra_time(&(inode)->xtime, \ + raw_inode->xtime ## _extra); \ +} while (0) + +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ +do { \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + (einode)->xtime.tv_sec = \ + (signed)le32_to_cpu((raw_inode)->xtime); \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ + ext4_decode_extra_time(&(einode)->xtime, \ + raw_inode->xtime ## _extra); \ +} while (0) + #if defined(__KERNEL__) || defined(__linux__) #define i_reserved1 osd1.linux1.l_i_reserved1 #define i_frag osd2.linux2.l_i_frag @@ -539,6 +614,13 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode) return container_of(inode, struct ext4_inode_info, vfs_inode); } +static inline struct timespec ext4_current_time(struct inode *inode) +{ + return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? + current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; +} + + static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) { return ino == EXT4_ROOT_INO || @@ -609,6 +691,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 +#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 @@ -626,6 +709,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) EXT4_FEATURE_INCOMPAT_64BIT) #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ EXT4_FEATURE_RO_COMPAT_BTREE_DIR) /* diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h index 9de494406995..1a511e9905aa 100644 --- a/include/linux/ext4_fs_i.h +++ b/include/linux/ext4_fs_i.h @@ -153,6 +153,11 @@ struct ext4_inode_info { unsigned long i_ext_generation; struct ext4_ext_cache i_cached_extent; + /* + * File creation time. Its function is same as that of + * struct timespec i_{a,c,m}time in the generic inode. + */ + struct timespec i_crtime; }; #endif /* _LINUX_EXT4_FS_I */ diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h index 0f7dc15924bf..1b2ffee12be9 100644 --- a/include/linux/ext4_fs_sb.h +++ b/include/linux/ext4_fs_sb.h @@ -81,6 +81,7 @@ struct ext4_sb_info { char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ int s_jquota_fmt; /* Format of quota to use */ #endif + unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ #ifdef EXTENTS_STATS /* ext4 extents stats */ -- cgit v1.2.3 From 6dd4ee7cab7e3a17c571aebd444f4344c8c4946e Mon Sep 17 00:00:00 2001 From: Kalpak Shah Date: Wed, 18 Jul 2007 09:19:57 -0400 Subject: ext4: Expand extra_inodes space per the s_{want,min}_extra_isize fields We need to make sure that existing ext3 filesystems can also avail the new fields that have been added to the ext4 inode. We use s_want_extra_isize and s_min_extra_isize to decide by how much we should expand the inode. If EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE feature is set then we expand the inode by max(s_want_extra_isize, s_min_extra_isize , sizeof(ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE) bytes. Actually it is still an open question about whether users should be able to set s_*_extra_isize smaller than the known fields or not. This patch also adds the functionality to expand inodes to include the newly added fields. We start by trying to expand by s_want_extra_isize bytes and if its fails we try to expand by s_min_extra_isize bytes. This is done by changing the i_extra_isize if enough space is available in the inode and no EAs are present. If EAs are present and there is enough space in the inode then the EAs in the inode are shifted to make space. If enough space is not available in the inode due to the EAs then 1 or more EAs are shifted to the external EA block. In the worst case when even the external EA block does not have enough space we inform the user that some EA would need to be deleted or s_min_extra_isize would have to be reduced. Signed-off-by: Andreas Dilger Signed-off-by: Kalpak Shah Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- include/linux/ext4_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index df5e38faa15f..52dcc24dd986 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -202,6 +202,7 @@ struct ext4_group_desc #define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ #define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ +#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ /* Used to pass group descriptor data when online resize is done */ struct ext4_new_group_input { -- cgit v1.2.3 From f8628a14a27eb4512a1ede43de1d9db4d9f92bc3 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Wed, 18 Jul 2007 08:38:01 -0400 Subject: ext4: Remove 65000 subdirectory limit This patch adds support to ext4 for allowing more than 65000 subdirectories. Currently the maximum number of subdirectories is capped at 32000. If we exceed 65000 subdirectories in an htree directory it sets the inode link count to 1 and no longer counts subdirectories. The directory link count is not actually used when determining if a directory is empty, as that only counts subdirectories and not regular files that might be in there. A EXT4_FEATURE_RO_COMPAT_DIR_NLINK flag has been added and it is set if the subdir count for any directory crosses 65000. A later fsck will clear EXT4_FEATURE_RO_COMPAT_DIR_NLINK if there are no longer any directory with >65000 subdirs. Signed-off-by: Andreas Dilger Signed-off-by: Kalpak Shah Signed-off-by: "Theodore Ts'o" --- include/linux/ext4_fs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 52dcc24dd986..cdee7aaa57aa 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -71,7 +71,7 @@ /* * Maximal count of links to a file */ -#define EXT4_LINK_MAX 32000 +#define EXT4_LINK_MAX 65000 /* * Macro-instructions used to manage several block sizes @@ -692,6 +692,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 +#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 @@ -710,6 +711,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) EXT4_FEATURE_INCOMPAT_64BIT) #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ EXT4_FEATURE_RO_COMPAT_BTREE_DIR) -- cgit v1.2.3 From b187f180cc942e50007aa039f8e3a620ee5f3171 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 18 Jul 2007 00:49:10 -0700 Subject: serial: add early_serial_setup() back to header file early_serial_setup was removed from serial.h, but forgot to put in serial_8250.h Signed-off-by: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/serial_8250.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 706ee9a4c80c..8518fa2a6f89 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -60,6 +60,8 @@ void serial8250_unregister_port(int line); void serial8250_suspend_port(int line); void serial8250_resume_port(int line); +extern int early_serial_setup(struct uart_port *port); + extern int serial8250_find_port(struct uart_port *p); extern int serial8250_find_port_for_earlycon(void); extern int setup_early_serial8250_console(char *cmdline); -- cgit v1.2.3 From 8b4a40809e5330c9da5d20107d693d92d73b31dc Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 18 Jul 2007 00:49:11 -0700 Subject: zs: move to the serial subsystem This is a reimplementation of the zs driver for the serial subsystem. Any resemblance to the old driver is purely coincidential. ;-) I do hope I got the handling of modem lines right -- better do not tackle me about the issue unless you feel too good... Any users of the old driver: please note the numbers of the serial lines have now been swapped, i.e. ttyS0 <-> ttyS1 and ttyS2 <-> ttyS3. It has to do with the modem lines mentioned above; basically the port A in a given chip has to be initialised before the port B if you want to use the latter as the serial console (which is usually the case), as operations on modem lines of the serial line associated with the port B access both ports (see the comment at the top of the driver for the details of wiring used). Please update your scripts. This is also the reason each SCC now requests an IRQ once only (as seen in "/proc/interrupts") -- the handler takes care of both ports at once as the line associated with the port B has to take status update interrupts from both ports (and yet the line of the port A takes its own for itself too). The old driver never got it right... Signed-off-by: Maciej W. Rozycki Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/serial_core.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 9c721cd2c9d6..773d8d8828ad 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -62,8 +62,9 @@ /* NEC v850. */ #define PORT_V850E_UART 40 -/* DZ */ -#define PORT_DZ 47 +/* DEC */ +#define PORT_DZ 46 +#define PORT_ZS 47 /* Parisc type numbers. */ #define PORT_MUX 48 -- cgit v1.2.3 From 1e66df3ee301209f4a38df097d7cc5cb9b367a3f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:02 -0700 Subject: add kstrndup Add a kstrndup function, modelled on strndup. Like strndup this returns a string copied into its own allocated memory, but it copies no more than the specified number of bytes from the source. Remove private strndup() from irda code. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Chris Wright Cc: Andrew Morton Cc: Randy Dunlap Cc: YOSHIFUJI Hideaki Cc: Akinobu Mita Cc: Arnaldo Carvalho de Melo Cc: Al Viro Cc: Panagiotis Issaris Cc: Rene Scharfe --- include/linux/string.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index 7f2eb6a477f9..ee5e9ccc4aae 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -105,6 +105,7 @@ extern void * memchr(const void *,int,__kernel_size_t); #endif extern char *kstrdup(const char *s, gfp_t gfp); +extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp); #ifdef __cplusplus -- cgit v1.2.3 From d84d1cc7647c7e4f77d517e2d87b4a106a0420d9 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:02 -0700 Subject: add argv_split() argv_split() is a helper function which takes a string, splits it at whitespace, and returns a NULL-terminated argv vector. This is deliberately simple - it does no quote processing of any kind. [ Seems to me that this is something which is already being done in the kernel, but I couldn't find any other implementations, either to steal or replace. Keep an eye out. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Chris Wright Cc: Andrew Morton Cc: Randy Dunlap --- include/linux/string.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index ee5e9ccc4aae..836062b7582a 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -108,6 +108,9 @@ extern char *kstrdup(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp); +extern char **argv_split(gfp_t gfp, const char *str, int *argcp); +extern void argv_free(char **argv); + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 0ab4dc92278a0f3816e486d6350c6652a72e06c8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:02 -0700 Subject: usermodehelper: split setup from execution Rather than having hundreds of variations of call_usermodehelper for various pieces of usermode state which could be set up, split the info allocation and initialization from the actual process execution. This means the general pattern becomes: info = call_usermodehelper_setup(path, argv, envp); /* basic state */ call_usermodehelper_(info, stuff...); /* extra state */ call_usermodehelper_exec(info, wait); /* run process and free info */ This patch introduces wrappers for all the existing calling styles for call_usermodehelper_*, but folds their implementations into one. Signed-off-by: Jeremy Fitzhardinge Cc: Andi Kleen Cc: Rusty Russell Cc: David Howells Cc: Bj?rn Steinbrink Cc: Randy Dunlap --- include/linux/kmod.h | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 10f505c8431d..c4cbe59d9c67 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -36,13 +36,51 @@ static inline int request_module(const char * name, ...) { return -ENOSYS; } #define try_then_request_module(x, mod...) ((x) ?: (request_module(mod), (x))) struct key; -extern int call_usermodehelper_keys(char *path, char *argv[], char *envp[], - struct key *session_keyring, int wait); +struct file; +struct subprocess_info; + +/* Allocate a subprocess_info structure */ +struct subprocess_info *call_usermodehelper_setup(char *path, + char **argv, char **envp); + +/* Set various pieces of state into the subprocess_info structure */ +void call_usermodehelper_setkeys(struct subprocess_info *info, + struct key *session_keyring); +int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info, + struct file **filp); +void call_usermodehelper_setcleanup(struct subprocess_info *info, + void (*cleanup)(char **argv, char **envp)); + +/* Actually execute the sub-process */ +int call_usermodehelper_exec(struct subprocess_info *info, int wait); + +/* Free the subprocess_info. This is only needed if you're not going + to call call_usermodehelper_exec */ +void call_usermodehelper_freeinfo(struct subprocess_info *info); static inline int call_usermodehelper(char *path, char **argv, char **envp, int wait) { - return call_usermodehelper_keys(path, argv, envp, NULL, wait); + struct subprocess_info *info; + + info = call_usermodehelper_setup(path, argv, envp); + if (info == NULL) + return -ENOMEM; + return call_usermodehelper_exec(info, wait); +} + +static inline int +call_usermodehelper_keys(char *path, char **argv, char **envp, + struct key *session_keyring, int wait) +{ + struct subprocess_info *info; + + info = call_usermodehelper_setup(path, argv, envp); + if (info == NULL) + return -ENOMEM; + + call_usermodehelper_setkeys(info, session_keyring); + return call_usermodehelper_exec(info, wait); } extern void usermodehelper_init(void); -- cgit v1.2.3 From 10a0a8d4e3f6bf2d077f94344441909abe670f5a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:02 -0700 Subject: Add common orderly_poweroff() Various pieces of code around the kernel want to be able to trigger an orderly poweroff. This pulls them together into a single implementation. By default the poweroff command is /sbin/poweroff, but it can be set via sysctl: kernel/poweroff_cmd. This is split at whitespace, so it can include command-line arguments. This patch replaces four other instances of invoking either "poweroff" or "shutdown -h now": two sbus drivers, and acpi thermal management. sparc64 has its own "powerd"; still need to determine whether it should be replaced by orderly_poweroff(). Signed-off-by: Jeremy Fitzhardinge Acked-by: Len Brown Signed-off-by: Chris Wright Cc: Andrew Morton Cc: Randy Dunlap Cc: Andi Kleen Cc: Al Viro Cc: Arnd Bergmann Cc: David S. Miller --- include/linux/reboot.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 1dd1c707311f..85ea63f462af 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -67,6 +67,11 @@ extern void kernel_power_off(void); void ctrl_alt_del(void); +#define POWEROFF_CMD_PATH_LEN 256 +extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN]; + +extern int orderly_poweroff(bool force); + /* * Emergency restart, callable from an interrupt handler. */ -- cgit v1.2.3 From 86313c488a6848b7ec2ba04e74f25f79dd32a0b7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:03 -0700 Subject: usermodehelper: Tidy up waiting Rather than using a tri-state integer for the wait flag in call_usermodehelper_exec, define a proper enum, and use that. I've preserved the integer values so that any callers I've missed should still work OK. Signed-off-by: Jeremy Fitzhardinge Cc: James Bottomley Cc: Randy Dunlap Cc: Christoph Hellwig Cc: Andi Kleen Cc: Paul Mackerras Cc: Johannes Berg Cc: Ralf Baechle Cc: Bjorn Helgaas Cc: Joel Becker Cc: Tony Luck Cc: Kay Sievers Cc: Srivatsa Vaddagiri Cc: Oleg Nesterov Cc: David Howells --- include/linux/kmod.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index c4cbe59d9c67..5dc13848891b 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -51,15 +51,21 @@ int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info, void call_usermodehelper_setcleanup(struct subprocess_info *info, void (*cleanup)(char **argv, char **envp)); +enum umh_wait { + UMH_NO_WAIT = -1, /* don't wait at all */ + UMH_WAIT_EXEC = 0, /* wait for the exec, but not the process */ + UMH_WAIT_PROC = 1, /* wait for the process to complete */ +}; + /* Actually execute the sub-process */ -int call_usermodehelper_exec(struct subprocess_info *info, int wait); +int call_usermodehelper_exec(struct subprocess_info *info, enum umh_wait wait); /* Free the subprocess_info. This is only needed if you're not going to call call_usermodehelper_exec */ void call_usermodehelper_freeinfo(struct subprocess_info *info); static inline int -call_usermodehelper(char *path, char **argv, char **envp, int wait) +call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait) { struct subprocess_info *info; @@ -71,7 +77,7 @@ call_usermodehelper(char *path, char **argv, char **envp, int wait) static inline int call_usermodehelper_keys(char *path, char **argv, char **envp, - struct key *session_keyring, int wait) + struct key *session_keyring, enum umh_wait wait) { struct subprocess_info *info; -- cgit v1.2.3 From 810bab448e563ffd1718d78e9a3756806b626acc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:03 -0700 Subject: use elfnote.h to generate vsyscall notes. Use existing elfnote.h to generate vsyscall notes, rather than doing it locally. Changes elfnote.h a bit to suit, since this is the first asm user, and it wasn't quite right. Signed-off-by: Jeremy Fitzhardinge Cc: "Eric W. Biederman" Cc: Roland McGrath Cc: Andrew Morton --- include/linux/elfnote.h | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h index 9a1e0674e56c..e831759b2fb5 100644 --- a/include/linux/elfnote.h +++ b/include/linux/elfnote.h @@ -38,17 +38,25 @@ * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two") * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef) */ -#define ELFNOTE(name, type, desctype, descdata) \ -.pushsection .note.name, "",@note ; \ - .align 4 ; \ +#define ELFNOTE_START(name, type, flags) \ +.pushsection .note.name, flags,@note ; \ + .balign 4 ; \ .long 2f - 1f /* namesz */ ; \ - .long 4f - 3f /* descsz */ ; \ + .long 4484f - 3f /* descsz */ ; \ .long type ; \ 1:.asciz #name ; \ -2:.align 4 ; \ -3:desctype descdata ; \ -4:.align 4 ; \ +2:.balign 4 ; \ +3: + +#define ELFNOTE_END \ +4484:.balign 4 ; \ .popsection ; + +#define ELFNOTE(name, type, desc) \ + ELFNOTE_START(name, type, "") \ + desc ; \ + ELFNOTE_END + #else /* !__ASSEMBLER__ */ #include /* -- cgit v1.2.3 From 5f4352fbffd6c45123dbce9e195efd54df4e177e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:04 -0700 Subject: Allocate and free vmalloc areas Allocate/release a chunk of vmalloc address space: alloc_vm_area reserves a chunk of address space, and makes sure all the pagetables are constructed for that address range - but no pages. free_vm_area releases the address space range. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ian Pratt Signed-off-by: Christian Limpach Signed-off-by: Chris Wright Cc: "Jan Beulich" Cc: "Andi Kleen" --- include/linux/vmalloc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 132b260aef1e..c2b10cae5da5 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -70,6 +70,10 @@ extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); extern void unmap_kernel_range(unsigned long addr, unsigned long size); +/* Allocate/destroy a 'vmalloc' VM area. */ +extern struct vm_struct *alloc_vm_area(size_t size); +extern void free_vm_area(struct vm_struct *area); + /* * Internals. Dont't use.. */ -- cgit v1.2.3 From c85b04c3749507546f6d5868976e4793e35c2ec0 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:05 -0700 Subject: xen: add pinned page flag Add a new definition for PG_owner_priv_1 to define PG_pinned on Xen pagetable pages. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Chris Wright --- include/linux/page-flags.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index ae2d79f2107e..731cd2ac3227 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -92,6 +92,7 @@ /* PG_owner_priv_1 users should have descriptive aliases */ #define PG_checked PG_owner_priv_1 /* Used by some filesystems */ +#define PG_pinned PG_owner_priv_1 /* Xen pinned pagetable */ #if (BITS_PER_LONG > 32) /* @@ -170,6 +171,10 @@ static inline void SetPageUptodate(struct page *page) #define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) #define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags) +#define PagePinned(page) test_bit(PG_pinned, &(page)->flags) +#define SetPagePinned(page) set_bit(PG_pinned, &(page)->flags) +#define ClearPagePinned(page) clear_bit(PG_pinned, &(page)->flags) + #define PageReserved(page) test_bit(PG_reserved, &(page)->flags) #define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) #define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) -- cgit v1.2.3 From 9f27ee595038653ddf8bca871200d39247d6f4fc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:06 -0700 Subject: xen: add virtual block device driver. The block device frontend driver allows the kernel to access block devices exported exported by a virtual machine containing a physical block device driver. Signed-off-by: Ian Pratt Signed-off-by: Christian Limpach Signed-off-by: Chris Wright Cc: Arjan van de Ven Cc: Greg KH Cc: Jens Axboe --- include/linux/major.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/major.h b/include/linux/major.h index 7e7c9093919a..0cb98053537a 100644 --- a/include/linux/major.h +++ b/include/linux/major.h @@ -158,6 +158,8 @@ #define VXSPEC_MAJOR 200 /* VERITAS volume config driver */ #define VXDMP_MAJOR 201 /* VERITAS volume multipath driver */ +#define XENVBD_MAJOR 202 /* Xen virtual block device */ + #define MSR_MAJOR 202 #define CPUID_MAJOR 203 -- cgit v1.2.3 From 1c50dc83f9ca752b1e1b985f1ce33d2695103ffa Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Jan 2007 01:18:41 -0800 Subject: [SCSI] sas_ata: ata_post_internal should abort the sas_task This patch adds a new field, lldd_task, to ata_queued_cmd so that libata users such as libsas can associate some data with a qc. The particular ambition with this patch is to associate a sas_task with a qc; that way, if libata decides to timeout a command, we can come back (in sas_ata_post_internal) and abort the sas task. One question remains: Is it necessary to reset the phy on error, or will the libata error handler take care of it? (Assuming that one is written, of course.) This patch, as it is today, works well enough to clean things up when an ATA device probe attempt fails halfway through the probe, though I'm not sure this is always the right thing to do. Signed-off-by: Darrick J. Wong Signed-off-by: James Bottomley --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 47cd2a1c5544..4abb758a0450 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -411,6 +411,7 @@ struct ata_queued_cmd { ata_qc_cb_t complete_fn; void *private_data; + void *lldd_task; }; struct ata_port_stats { -- cgit v1.2.3 From 0c8db6beb81a07147f64cffd33bd43b9e96f4f40 Mon Sep 17 00:00:00 2001 From: "Prakash, Sathya" Date: Tue, 17 Jul 2007 13:40:10 +0530 Subject: [SCSI] add PCI_VENDOR_ID macro for Brocade in pci_ids.h Adds PCI_VENDOR_ID_BROCADE macro in include/linux/pci_ids.h file. This macro is used in MPT Fusion FC drivers to support Brocade branded FC controllers signed-off-by: Sathya Prakash Signed-off-by: James Bottomley --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2c7add169539..13d36bb01a42 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2017,6 +2017,8 @@ #define PCI_VENDOR_ID_ARIMA 0x161f +#define PCI_VENDOR_ID_BROCADE 0x1657 + #define PCI_VENDOR_ID_SIBYTE 0x166d #define PCI_DEVICE_ID_BCM1250_PCI 0x0001 #define PCI_DEVICE_ID_BCM1250_HT 0x0002 -- cgit v1.2.3 From 7132ab7f6e0309bb8e0424e395ba149aee0c750e Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Wed, 11 Jul 2007 11:43:07 -0500 Subject: Fix RGMII-ID handling in gianfar The TSEC/eTSEC can detect the interface to the PHY automatically, but it isn't able to detect whether the RGMII connection needs internal delay. So we need to detect that change in the device tree, propagate it to the platform data, and then check it if we're in RGMII. This fixes a bug on the 8641D HPCN board where the Vitesse PHY doesn't use the delay for RGMII. Signed-off-by: Andy Fleming --- include/linux/fsl_devices.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 695741b0e420..1831b196c70a 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -53,6 +53,7 @@ struct gianfar_platform_data { u32 bus_id; u32 phy_id; u8 mac_addr[6]; + phy_interface_t interface; }; struct gianfar_mdio_data { -- cgit v1.2.3 From b4ff4f0419ae5db83553fab79d03a89c10d540a8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Jul 2007 15:46:06 -0700 Subject: [NETLINK]: allocate group bitmaps dynamically Allow changing the number of groups for a netlink family after it has been created, use RCU to protect the listeners bitmap keeping netlink_has_listeners() lock-free. Signed-off-by: Johannes Berg Acked-by: Patrick McHardy Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 2e23353c28a5..b971ddd24090 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -161,6 +161,7 @@ extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct mutex *cb_mutex, struct module *module); +extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); -- cgit v1.2.3 From 84659eb529b33572bb3f8c94e0978bd5d084bc7e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Jul 2007 15:47:05 -0700 Subject: [NETLIKN]: Allow removing multicast groups. Allow kicking listeners out of a multicast group when necessary (for example if that group is going to be removed.) Signed-off-by: Johannes Berg Acked-by: Patrick McHardy Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index b971ddd24090..83d8239f0cce 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -162,6 +162,7 @@ extern struct sock *netlink_kernel_create(int unit, unsigned int groups, struct mutex *cb_mutex, struct module *module); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); +extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); -- cgit v1.2.3 From 2dbba6f773d1e1e4c78f03b0dbf19790d9017693 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Jul 2007 15:47:52 -0700 Subject: [GENETLINK]: Dynamic multicast groups. Introduce API to dynamically register and unregister multicast groups. Signed-off-by: Johannes Berg Acked-by: Patrick McHardy Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/genetlink.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index f7a93770e1be..7da02c93002b 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -39,6 +39,9 @@ enum { CTRL_CMD_NEWOPS, CTRL_CMD_DELOPS, CTRL_CMD_GETOPS, + CTRL_CMD_NEWMCAST_GRP, + CTRL_CMD_DELMCAST_GRP, + CTRL_CMD_GETMCAST_GRP, /* unused */ __CTRL_CMD_MAX, }; @@ -52,6 +55,7 @@ enum { CTRL_ATTR_HDRSIZE, CTRL_ATTR_MAXATTR, CTRL_ATTR_OPS, + CTRL_ATTR_MCAST_GROUPS, __CTRL_ATTR_MAX, }; @@ -66,4 +70,13 @@ enum { #define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1) +enum { + CTRL_ATTR_MCAST_GRP_UNSPEC, + CTRL_ATTR_MCAST_GRP_NAME, + CTRL_ATTR_MCAST_GRP_ID, + __CTRL_ATTR_MCAST_GRP_MAX, +}; + +#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) + #endif /* __LINUX_GENERIC_NETLINK_H */ -- cgit v1.2.3 From 60a96a59569bab85571d0089682109bd3324e896 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 8 Jul 2007 22:29:26 +0200 Subject: Driver core: accept all valid action-strings in uevent-trigger This allows the uevent file to handle any type of uevent action to be triggered by userspace instead of just the "add" uevent. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 06cbf41d32d2..aa2fe22b1baa 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -36,15 +36,24 @@ extern char uevent_helper[]; /* counter to tag the uevent, read only except for the kobject core */ extern u64 uevent_seqnum; -/* the actions here must match the proper string in lib/kobject_uevent.c */ -typedef int __bitwise kobject_action_t; +/* + * The actions here must match the index to the string array + * in lib/kobject_uevent.c + * + * Do not add new actions here without checking with the driver-core + * maintainers. Action strings are not meant to express subsystem + * or device specific properties. In most cases you want to send a + * kobject_uevent_env(kobj, KOBJ_CHANGE, env) with additional event + * specific variables added to the event environment. + */ enum kobject_action { - KOBJ_ADD = (__force kobject_action_t) 0x01, /* exclusive to core */ - KOBJ_REMOVE = (__force kobject_action_t) 0x02, /* exclusive to core */ - KOBJ_CHANGE = (__force kobject_action_t) 0x03, /* device state change */ - KOBJ_OFFLINE = (__force kobject_action_t) 0x04, /* device offline */ - KOBJ_ONLINE = (__force kobject_action_t) 0x05, /* device online */ - KOBJ_MOVE = (__force kobject_action_t) 0x06, /* device move */ + KOBJ_ADD, + KOBJ_REMOVE, + KOBJ_CHANGE, + KOBJ_MOVE, + KOBJ_ONLINE, + KOBJ_OFFLINE, + KOBJ_MAX }; struct kobject { -- cgit v1.2.3 From 3f8df781fc5f9ee5253a54ba669e1c8872844b86 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 12 Jul 2007 16:57:22 -0400 Subject: PM: remove deprecated dpm_runtime_* routines This patch (as933) removes the deprecated dpm_runtime_suspend() and dpm_runtime_resume() routines from the PM core. The only user of those routines is the PCMCIA ds driver; local replacements are added. Signed-off-by: Alan Stern CC: Dominik Brodowski Signed-off-by: Greg Kroah-Hartman --- include/linux/pm.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 273781c82e4d..2735b7cadd20 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -284,8 +284,6 @@ extern int device_prepare_suspend(pm_message_t state); #define device_may_wakeup(dev) \ (device_can_wakeup(dev) && (dev)->power.should_wakeup) -extern int dpm_runtime_suspend(struct device *, pm_message_t); -extern void dpm_runtime_resume(struct device *); extern void __suspend_report_result(const char *function, void *fn, int ret); #define suspend_report_result(fn, ret) \ @@ -317,15 +315,6 @@ static inline int device_suspend(pm_message_t state) #define device_set_wakeup_enable(dev,val) do{}while(0) #define device_may_wakeup(dev) (0) -static inline int dpm_runtime_suspend(struct device * dev, pm_message_t state) -{ - return 0; -} - -static inline void dpm_runtime_resume(struct device * dev) -{ -} - #define suspend_report_result(fn, ret) do { } while (0) static inline int call_platform_enable_wakeup(struct device *dev, int is_on) -- cgit v1.2.3 From aebdc3b450a3febf7d7d00cd2235509055ec7082 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Thu, 12 Jul 2007 22:08:22 -0700 Subject: dev_vdbg(), available with -DVERBOSE_DEBUG This defines a dev_vdbg() call, which is enabled with -DVERBOSE_DEBUG. When enabled, dev_vdbg() acts just like dev_dbg(). When disabled, it is a NOP ... just like dev_dbg() without -DDEBUG. The specific code was moved out of a USB patch, but lots of drivers have similar support. That is, code can now be written to use an additional level of debug output, selected at compile time. Many driver authors have found this idiom to be very useful. A typical usage model is for "normal" debug messages to focus on fault paths and not be very "chatty", so that those messages can be left on during normal operation without much of a performance or syslog load. On the other hand "verbose" messages would be noisy enough that they wouldn't normally be enabled; they might even affect timings enough to change system or driver behavior. Signed-off-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index be2debed70d2..d9f0a57f5a2f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -572,6 +572,16 @@ dev_dbg(struct device * dev, const char * fmt, ...) } #endif +#ifdef VERBOSE_DEBUG +#define dev_vdbg dev_dbg +#else +static inline int __attribute__ ((format (printf, 2, 3))) +dev_vdbg(struct device * dev, const char * fmt, ...) +{ + return 0; +} +#endif + #define dev_err(dev, format, arg...) \ dev_printk(KERN_ERR , dev , format , ## arg) #define dev_info(dev, format, arg...) \ -- cgit v1.2.3 From beafc54c4e2fba24e1ca45cdb7f79d9aa83e3db1 Mon Sep 17 00:00:00 2001 From: "Hans J. Koch" Date: Thu, 7 Dec 2006 10:58:29 +0100 Subject: UIO: Add the User IO core code This interface allows the ability to write the majority of a driver in userspace with only a very small shell of a driver in the kernel itself. It uses a char device and sysfs to interact with a userspace process to process interrupts and control memory accesses. See the docbook documentation for more details on how to use this interface. From: Hans J. Koch Cc: Thomas Gleixner Cc: Benedikt Spranger Signed-off-by: Greg Kroah-Hartman --- include/linux/uio_driver.h | 91 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 include/linux/uio_driver.h (limited to 'include/linux') diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h new file mode 100644 index 000000000000..44c28e94df50 --- /dev/null +++ b/include/linux/uio_driver.h @@ -0,0 +1,91 @@ +/* + * include/linux/uio_driver.h + * + * Copyright(C) 2005, Benedikt Spranger + * Copyright(C) 2005, Thomas Gleixner + * Copyright(C) 2006, Hans J. Koch + * Copyright(C) 2006, Greg Kroah-Hartman + * + * Userspace IO driver. + * + * Licensed under the GPLv2 only. + */ + +#ifndef _UIO_DRIVER_H_ +#define _UIO_DRIVER_H_ + +#include +#include +#include + +/** + * struct uio_mem - description of a UIO memory region + * @kobj: kobject for this mapping + * @addr: address of the device's memory + * @size: size of IO + * @memtype: type of memory addr points to + * @internal_addr: ioremap-ped version of addr, for driver internal use + */ +struct uio_mem { + struct kobject kobj; + unsigned long addr; + unsigned long size; + int memtype; + void __iomem *internal_addr; +}; + +#define MAX_UIO_MAPS 5 + +struct uio_device; + +/** + * struct uio_info - UIO device capabilities + * @uio_dev: the UIO device this info belongs to + * @name: device name + * @version: device driver version + * @mem: list of mappable memory regions, size==0 for end of list + * @irq: interrupt number or UIO_IRQ_CUSTOM + * @irq_flags: flags for request_irq() + * @priv: optional private data + * @handler: the device's irq handler + * @mmap: mmap operation for this uio device + * @open: open operation for this uio device + * @release: release operation for this uio device + */ +struct uio_info { + struct uio_device *uio_dev; + char *name; + char *version; + struct uio_mem mem[MAX_UIO_MAPS]; + long irq; + unsigned long irq_flags; + void *priv; + irqreturn_t (*handler)(int irq, struct uio_info *dev_info); + int (*mmap)(struct uio_info *info, struct vm_area_struct *vma); + int (*open)(struct uio_info *info, struct inode *inode); + int (*release)(struct uio_info *info, struct inode *inode); +}; + +extern int __must_check + __uio_register_device(struct module *owner, + struct device *parent, + struct uio_info *info); +static inline int __must_check + uio_register_device(struct device *parent, struct uio_info *info) +{ + return __uio_register_device(THIS_MODULE, parent, info); +} +extern void uio_unregister_device(struct uio_info *info); +extern void uio_event_notify(struct uio_info *info); + +/* defines for uio_device->irq */ +#define UIO_IRQ_CUSTOM -1 +#define UIO_IRQ_NONE -2 + +/* defines for uio_device->memtype */ +#define UIO_MEM_NONE 0 +#define UIO_MEM_PHYS 1 +#define UIO_MEM_LOGICAL 2 +#define UIO_MEM_VIRTUAL 3 + +#endif /* _LINUX_UIO_DRIVER_H_ */ -- cgit v1.2.3 From a9933cea7a1d80dd9efae9f1acd857f5dce742b9 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 7 Jun 2007 17:09:49 -0400 Subject: locks: rename lease functions to reflect locks.c conventions We've been using the convention that vfs_foo is the function that calls a filesystem-specific foo method if it exists, or falls back on a generic method if it doesn't; thus vfs_foo is what is called when some other part of the kernel (normally lockd or nfsd) wants to get a lock, whereas foo is what filesystems call to use the underlying local functionality as part of their lock implementation. So rename setlease to vfs_setlease (which will call a filesystem-specific setlease after a later patch) and __setlease to setlease. Also, vfs_setlease need only be GPL-exported as long as it's only needed by lockd and nfsd. Signed-off-by: "J. Bruce Fields" --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 98205f680476..a24f029accc0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -872,7 +872,7 @@ extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags); extern void lease_get_mtime(struct inode *, struct timespec *time); -extern int setlease(struct file *, long, struct file_lock **); +extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); -- cgit v1.2.3 From f9ffed26d6f3e6ac9988947242821579d615fda7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 14 Nov 2006 15:51:40 -0500 Subject: locks: provide a file lease method enabling cluster-coherent leases Currently leases are only kept locally, so there's no way for a distributed filesystem to enforce them against multiple clients. We're particularly interested in the case of nfsd exporting a cluster filesystem, in which case nfsd needs cluster-coherent leases in order to implement delegations correctly. Also add some documentation. Signed-off-by: J. Bruce Fields --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index a24f029accc0..c8ddf34e9710 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1122,6 +1122,7 @@ struct file_operations { int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); + int (*setlease)(struct file *, long, struct file_lock **); }; struct inode_operations { -- cgit v1.2.3 From 4698afe8e3a725576366f86560a8a8242b21b9f7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 4 Jul 2007 17:21:37 -0400 Subject: locks: export setlease to filesystems Export setlease so it can used by filesystems to implement their lease methods. Signed-off-by: "J. Bruce Fields" --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c8ddf34e9710..b188c2e5338d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -872,6 +872,7 @@ extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags); extern void lease_get_mtime(struct inode *, struct timespec *time); +extern int setlease(struct file *, long, struct file_lock **); extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); -- cgit v1.2.3 From 6d34ac199a4af5c678a3a8f3275aeb2586b72da3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 11 May 2007 16:09:32 -0400 Subject: locks: make posix_test_lock() interface more consistent Since posix_test_lock(), like fcntl() and ->lock(), indicates absence or presence of a conflict lock by setting fl_type to, respectively, F_UNLCK or something other than F_UNLCK, the return value is no longer needed. Signed-off-by: "J. Bruce Fields" --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b188c2e5338d..80deaaf1b746 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -862,7 +862,7 @@ extern void locks_init_lock(struct file_lock *); extern void locks_copy_lock(struct file_lock *, struct file_lock *); extern void locks_remove_posix(struct file *, fl_owner_t); extern void locks_remove_flock(struct file *); -extern int posix_test_lock(struct file *, struct file_lock *); +extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file *, struct file_lock *); -- cgit v1.2.3 From 5417169026c3df151adf5a65eb061278b0a72e69 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 19 Jul 2007 17:39:55 +1000 Subject: [FS] Implement block_page_mkwrite. Many filesystems need a ->page-mkwrite callout to correctly set up pages that have been written to by mmap. This is especially important when mmap is writing into holes as it allows filesystems to correctly account for and allocate space before the mmap write is allowed to proceed. Protection against truncate races is provided by locking the page and checking to see whether the page mapping is correct and whether it is beyond EOF so we don't end up allowing allocations beyond the current EOF or changing EOF as a result of a mmap write. SGI-PV: 940392 SGI-Modid: 2.6.x-xfs-melb:linux:29146a Signed-off-by: David Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Tim Shimmin --- include/linux/buffer_head.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 5c6e12853a9b..35cadad84b14 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -209,6 +209,8 @@ int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*, int generic_cont_expand(struct inode *inode, loff_t size); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); +int block_page_mkwrite(struct vm_area_struct *vma, struct page *page, + get_block_t get_block); void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int generic_commit_write(struct file *, struct page *, unsigned, unsigned); -- cgit v1.2.3 From d00806b183152af6d24f46f0c33f14162ca1262a Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 19 Jul 2007 01:46:57 -0700 Subject: mm: fix fault vs invalidate race for linear mappings Fix the race between invalidate_inode_pages and do_no_page. Andrea Arcangeli identified a subtle race between invalidation of pages from pagecache with userspace mappings, and do_no_page. The issue is that invalidation has to shoot down all mappings to the page, before it can be discarded from the pagecache. Between shooting down ptes to a particular page, and actually dropping the struct page from the pagecache, do_no_page from any process might fault on that page and establish a new mapping to the page just before it gets discarded from the pagecache. The most common case where such invalidation is used is in file truncation. This case was catered for by doing a sort of open-coded seqlock between the file's i_size, and its truncate_count. Truncation will decrease i_size, then increment truncate_count before unmapping userspace pages; do_no_page will read truncate_count, then find the page if it is within i_size, and then check truncate_count under the page table lock and back out and retry if it had subsequently been changed (ptl will serialise against unmapping, and ensure a potentially updated truncate_count is actually visible). Complexity and documentation issues aside, the locking protocol fails in the case where we would like to invalidate pagecache inside i_size. do_no_page can come in anytime and filemap_nopage is not aware of the invalidation in progress (as it is when it is outside i_size). The end result is that dangling (->mapping == NULL) pages that appear to be from a particular file may be mapped into userspace with nonsense data. Valid mappings to the same place will see a different page. Andrea implemented two working fixes, one using a real seqlock, another using a page->flags bit. He also proposed using the page lock in do_no_page, but that was initially considered too heavyweight. However, it is not a global or per-file lock, and the page cacheline is modified in do_no_page to increment _count and _mapcount anyway, so a further modification should not be a large performance hit. Scalability is not an issue. This patch implements this latter approach. ->nopage implementations return with the page locked if it is possible for their underlying file to be invalidated (in that case, they must set a special vm_flags bit to indicate so). do_no_page only unlocks the page after setting up the mapping completely. invalidation is excluded because it holds the page lock during invalidation of each page (and ensures that the page is not mapped while holding the lock). This also allows significant simplifications in do_no_page, because we have the page locked in the right place in the pagecache from the start. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a5c451816fdc..ca9536a348c8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -168,6 +168,12 @@ extern unsigned int kobjsize(const void *objp); #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ +#define VM_CAN_INVALIDATE 0x08000000 /* The mapping may be invalidated, + * eg. truncate or invalidate_inode_*. + * In this case, do_no_page must + * return with the page locked. + */ + #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS #endif -- cgit v1.2.3 From 54cb8821de07f2ffcd28c380ce9b93d5784b40d7 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 19 Jul 2007 01:46:59 -0700 Subject: mm: merge populate and nopage into fault (fixes nonlinear) Nonlinear mappings are (AFAIKS) simply a virtual memory concept that encodes the virtual address -> file offset differently from linear mappings. ->populate is a layering violation because the filesystem/pagecache code should need to know anything about the virtual memory mapping. The hitch here is that the ->nopage handler didn't pass down enough information (ie. pgoff). But it is more logical to pass pgoff rather than have the ->nopage function calculate it itself anyway (because that's a similar layering violation). Having the populate handler install the pte itself is likewise a nasty thing to be doing. This patch introduces a new fault handler that replaces ->nopage and ->populate and (later) ->nopfn. Most of the old mechanism is still in place so there is a lot of duplication and nice cleanups that can be removed if everyone switches over. The rationale for doing this in the first place is that nonlinear mappings are subject to the pagefault vs invalidate/truncate race too, and it seemed stupid to duplicate the synchronisation logic rather than just consolidate the two. After this patch, MAP_NONBLOCK no longer sets up ptes for pages present in pagecache. Seems like a fringe functionality anyway. NOPAGE_REFAULT is removed. This should be implemented with ->fault, and no users have hit mainline yet. [akpm@linux-foundation.org: cleanup] [randy.dunlap@oracle.com: doc. fixes for readahead] [akpm@linux-foundation.org: build fix] Signed-off-by: Nick Piggin Signed-off-by: Randy Dunlap Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ca9536a348c8..f28a1b3e63a9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -173,6 +173,7 @@ extern unsigned int kobjsize(const void *objp); * In this case, do_no_page must * return with the page locked. */ +#define VM_CAN_NONLINEAR 0x10000000 /* Has ->fault & does nonlinear pages */ #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS @@ -196,6 +197,25 @@ extern unsigned int kobjsize(const void *objp); */ extern pgprot_t protection_map[16]; +#define FAULT_FLAG_WRITE 0x01 +#define FAULT_FLAG_NONLINEAR 0x02 + +/* + * fault_data is filled in the the pagefault handler and passed to the + * vma's ->fault function. That function is responsible for filling in + * 'type', which is the type of fault if a page is returned, or the type + * of error if NULL is returned. + * + * pgoff should be used in favour of address, if possible. If pgoff is + * used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get + * nonlinear mapping support. + */ +struct fault_data { + unsigned long address; + pgoff_t pgoff; + unsigned int flags; + int type; +}; /* * These are the virtual MM functions - opening of an area, closing and @@ -205,9 +225,15 @@ extern pgprot_t protection_map[16]; struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); - struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type); - unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address); - int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); + struct page *(*fault)(struct vm_area_struct *vma, + struct fault_data *fdata); + struct page *(*nopage)(struct vm_area_struct *area, + unsigned long address, int *type); + unsigned long (*nopfn)(struct vm_area_struct *area, + unsigned long address); + int (*populate)(struct vm_area_struct *area, unsigned long address, + unsigned long len, pgprot_t prot, unsigned long pgoff, + int nonblock); /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ @@ -661,7 +687,6 @@ static inline int page_mapped(struct page *page) */ #define NOPAGE_SIGBUS (NULL) #define NOPAGE_OOM ((struct page *) (-1)) -#define NOPAGE_REFAULT ((struct page *) (-2)) /* Return to userspace, rerun */ /* * Error return values for the *_nopfn functions @@ -1110,9 +1135,11 @@ extern void truncate_inode_pages_range(struct address_space *, loff_t lstart, loff_t lend); /* generic vm_area_ops exported for stackable file systems */ -extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *); -extern int filemap_populate(struct vm_area_struct *, unsigned long, - unsigned long, pgprot_t, unsigned long, int); +extern struct page *filemap_fault(struct vm_area_struct *, struct fault_data *); +extern struct page * __deprecated_for_modules +filemap_nopage(struct vm_area_struct *, unsigned long, int *); +extern int __deprecated_for_modules filemap_populate(struct vm_area_struct *, + unsigned long, unsigned long, pgprot_t, unsigned long, int); /* mm/page-writeback.c */ int write_one_page(struct page *page, int wait); -- cgit v1.2.3 From d0217ac04ca6591841e5665f518e38064f4e65bd Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 19 Jul 2007 01:47:03 -0700 Subject: mm: fault feedback #1 Change ->fault prototype. We now return an int, which contains VM_FAULT_xxx code in the low byte, and FAULT_RET_xxx code in the next byte. FAULT_RET_ code tells the VM whether a page was found, whether it has been locked, and potentially other things. This is not quite the way he wanted it yet, but that's changed in the next patch (which requires changes to arch code). This means we no longer set VM_CAN_INVALIDATE in the vma in order to say that a page is locked which requires filemap_nopage to go away (because we can no longer remain backward compatible without that flag), but we were going to do that anyway. struct fault_data is renamed to struct vm_fault as Linus asked. address is now a void __user * that we should firmly encourage drivers not to use without really good reason. The page is now returned via a page pointer in the vm_fault struct. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 84 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f28a1b3e63a9..ff0b8844bd5a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -168,12 +168,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ -#define VM_CAN_INVALIDATE 0x08000000 /* The mapping may be invalidated, - * eg. truncate or invalidate_inode_*. - * In this case, do_no_page must - * return with the page locked. - */ -#define VM_CAN_NONLINEAR 0x10000000 /* Has ->fault & does nonlinear pages */ +#define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS @@ -197,24 +192,44 @@ extern unsigned int kobjsize(const void *objp); */ extern pgprot_t protection_map[16]; -#define FAULT_FLAG_WRITE 0x01 -#define FAULT_FLAG_NONLINEAR 0x02 +#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ +#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ + + +#define FAULT_RET_NOPAGE 0x0100 /* ->fault did not return a page. This + * can be used if the handler installs + * their own pte. + */ +#define FAULT_RET_LOCKED 0x0200 /* ->fault locked the page, caller must + * unlock after installing the mapping. + * This is used by pagecache in + * particular, where the page lock is + * used to synchronise against truncate + * and invalidate. Mutually exclusive + * with FAULT_RET_NOPAGE. + */ /* - * fault_data is filled in the the pagefault handler and passed to the - * vma's ->fault function. That function is responsible for filling in - * 'type', which is the type of fault if a page is returned, or the type - * of error if NULL is returned. + * vm_fault is filled by the the pagefault handler and passed to the vma's + * ->fault function. The vma's ->fault is responsible for returning the + * VM_FAULT_xxx type which occupies the lowest byte of the return code, ORed + * with FAULT_RET_ flags that occupy the next byte and give details about + * how the fault was handled. * - * pgoff should be used in favour of address, if possible. If pgoff is - * used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get - * nonlinear mapping support. + * pgoff should be used in favour of virtual_address, if possible. If pgoff + * is used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get nonlinear + * mapping support. */ -struct fault_data { - unsigned long address; - pgoff_t pgoff; - unsigned int flags; - int type; +struct vm_fault { + unsigned int flags; /* FAULT_FLAG_xxx flags */ + pgoff_t pgoff; /* Logical page offset based on vma */ + void __user *virtual_address; /* Faulting virtual address */ + + struct page *page; /* ->fault handlers should return a + * page here, unless FAULT_RET_NOPAGE + * is set (which is also implied by + * VM_FAULT_OOM or SIGBUS). + */ }; /* @@ -225,15 +240,11 @@ struct fault_data { struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); - struct page *(*fault)(struct vm_area_struct *vma, - struct fault_data *fdata); + int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); struct page *(*nopage)(struct vm_area_struct *area, unsigned long address, int *type); unsigned long (*nopfn)(struct vm_area_struct *area, unsigned long address); - int (*populate)(struct vm_area_struct *area, unsigned long address, - unsigned long len, pgprot_t prot, unsigned long pgoff, - int nonblock); /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ @@ -700,8 +711,14 @@ static inline int page_mapped(struct page *page) * Used to decide whether a process gets delivered SIGBUS or * just gets major/minor fault counters bumped up. */ -#define VM_FAULT_OOM 0x00 -#define VM_FAULT_SIGBUS 0x01 + +/* + * VM_FAULT_ERROR is set for the error cases, to make some tests simpler. + */ +#define VM_FAULT_ERROR 0x20 + +#define VM_FAULT_OOM (0x00 | VM_FAULT_ERROR) +#define VM_FAULT_SIGBUS (0x01 | VM_FAULT_ERROR) #define VM_FAULT_MINOR 0x02 #define VM_FAULT_MAJOR 0x03 @@ -711,6 +728,11 @@ static inline int page_mapped(struct page *page) */ #define VM_FAULT_WRITE 0x10 +/* + * Mask of VM_FAULT_ flags + */ +#define VM_FAULT_MASK 0xff + #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) extern void show_free_areas(void); @@ -793,8 +815,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, extern int vmtruncate(struct inode * inode, loff_t offset); extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); -extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); -extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); #ifdef CONFIG_MMU extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, @@ -1135,11 +1155,7 @@ extern void truncate_inode_pages_range(struct address_space *, loff_t lstart, loff_t lend); /* generic vm_area_ops exported for stackable file systems */ -extern struct page *filemap_fault(struct vm_area_struct *, struct fault_data *); -extern struct page * __deprecated_for_modules -filemap_nopage(struct vm_area_struct *, unsigned long, int *); -extern int __deprecated_for_modules filemap_populate(struct vm_area_struct *, - unsigned long, unsigned long, pgprot_t, unsigned long, int); +extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); /* mm/page-writeback.c */ int write_one_page(struct page *page, int wait); -- cgit v1.2.3 From 83c54070ee1a2d05c89793884bea1a03f2851ed4 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 19 Jul 2007 01:47:05 -0700 Subject: mm: fault feedback #2 This patch completes Linus's wish that the fault return codes be made into bit flags, which I agree makes everything nicer. This requires requires all handle_mm_fault callers to be modified (possibly the modifications should go further and do things like fault accounting in handle_mm_fault -- however that would be for another patch). [akpm@linux-foundation.org: fix alpha build] [akpm@linux-foundation.org: fix s390 build] [akpm@linux-foundation.org: fix sparc build] [akpm@linux-foundation.org: fix sparc64 build] [akpm@linux-foundation.org: fix ia64 build] Signed-off-by: Nick Piggin Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Russell King Cc: Ian Molton Cc: Bryan Wu Cc: Mikael Starvik Cc: David Howells Cc: Yoshinori Sato Cc: "Luck, Tony" Cc: Hirokazu Takata Cc: Geert Uytterhoeven Cc: Roman Zippel Cc: Greg Ungerer Cc: Matthew Wilcox Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Paul Mundt Cc: Kazumoto Kojima Cc: Richard Curnow Cc: William Lee Irwin III Cc: "David S. Miller" Cc: Jeff Dike Cc: Paolo 'Blaisorblade' Giarrusso Cc: Miles Bader Cc: Chris Zankel Acked-by: Kyle McMartin Acked-by: Haavard Skinnemoen Acked-by: Ralf Baechle Acked-by: Andi Kleen Signed-off-by: Andrew Morton [ Still apparently needs some ARM and PPC loving - Linus ] Signed-off-by: Linus Torvalds --- include/linux/mm.h | 58 ++++++++++++------------------------------------------ 1 file changed, 13 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ff0b8844bd5a..f8e12b3b6110 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -196,25 +196,10 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ -#define FAULT_RET_NOPAGE 0x0100 /* ->fault did not return a page. This - * can be used if the handler installs - * their own pte. - */ -#define FAULT_RET_LOCKED 0x0200 /* ->fault locked the page, caller must - * unlock after installing the mapping. - * This is used by pagecache in - * particular, where the page lock is - * used to synchronise against truncate - * and invalidate. Mutually exclusive - * with FAULT_RET_NOPAGE. - */ - /* * vm_fault is filled by the the pagefault handler and passed to the vma's - * ->fault function. The vma's ->fault is responsible for returning the - * VM_FAULT_xxx type which occupies the lowest byte of the return code, ORed - * with FAULT_RET_ flags that occupy the next byte and give details about - * how the fault was handled. + * ->fault function. The vma's ->fault is responsible for returning a bitmask + * of VM_FAULT_xxx flags that give details about how the fault was handled. * * pgoff should be used in favour of virtual_address, if possible. If pgoff * is used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get nonlinear @@ -226,9 +211,9 @@ struct vm_fault { void __user *virtual_address; /* Faulting virtual address */ struct page *page; /* ->fault handlers should return a - * page here, unless FAULT_RET_NOPAGE + * page here, unless VM_FAULT_NOPAGE * is set (which is also implied by - * VM_FAULT_OOM or SIGBUS). + * VM_FAULT_ERROR). */ }; @@ -712,26 +697,17 @@ static inline int page_mapped(struct page *page) * just gets major/minor fault counters bumped up. */ -/* - * VM_FAULT_ERROR is set for the error cases, to make some tests simpler. - */ -#define VM_FAULT_ERROR 0x20 +#define VM_FAULT_MINOR 0 /* For backwards compat. Remove me quickly. */ -#define VM_FAULT_OOM (0x00 | VM_FAULT_ERROR) -#define VM_FAULT_SIGBUS (0x01 | VM_FAULT_ERROR) -#define VM_FAULT_MINOR 0x02 -#define VM_FAULT_MAJOR 0x03 +#define VM_FAULT_OOM 0x0001 +#define VM_FAULT_SIGBUS 0x0002 +#define VM_FAULT_MAJOR 0x0004 +#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ -/* - * Special case for get_user_pages. - * Must be in a distinct bit from the above VM_FAULT_ flags. - */ -#define VM_FAULT_WRITE 0x10 +#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ +#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ -/* - * Mask of VM_FAULT_ flags - */ -#define VM_FAULT_MASK 0xff +#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) @@ -817,16 +793,8 @@ extern int vmtruncate(struct inode * inode, loff_t offset); extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); #ifdef CONFIG_MMU -extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, +extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access); - -static inline int handle_mm_fault(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, - int write_access) -{ - return __handle_mm_fault(mm, vma, address, write_access) & - (~VM_FAULT_WRITE); -} #else static inline int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, -- cgit v1.2.3 From bb2d5ce16409efcdf94017a6b6fecd468226e29c Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 19 Jul 2007 01:47:23 -0700 Subject: Remove alloc_zeroed_user_highpage() alloc_zeroed_user_highpage() has no in-tree users and it is not exported. As it is not exported, it can simply be removed. Signed-off-by: Mel Gorman Acked-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 12c5e4e3135a..1fcb0033179e 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -102,21 +102,6 @@ __alloc_zeroed_user_highpage(gfp_t movableflags, } #endif -/** - * alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA - * @vma: The VMA the page is to be allocated for - * @vaddr: The virtual address the page will be inserted into - * - * This function will allocate a page for a VMA that the caller knows will - * not be able to move in the future using move_pages() or reclaim. If it - * is known that the page can move, use alloc_zeroed_user_highpage_movable - */ -static inline struct page * -alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr) -{ - return __alloc_zeroed_user_highpage(0, vma, vaddr); -} - /** * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move * @vma: The VMA the page is to be allocated for -- cgit v1.2.3 From a634cc10164d1c229fbeca33923e6a0ed939e894 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Jul 2007 01:47:30 -0700 Subject: swsusp: introduce restore platform operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At least on some machines it is necessary to prepare the ACPI firmware for the restoration of the system memory state from the hibernation image if the "platform" mode of hibernation has been used. Namely, in that cases we need to disable the GPEs before replacing the "boot" kernel with the "frozen" kernel (cf. http://bugzilla.kernel.org/show_bug.cgi?id=7887). After the restore they will be re-enabled by hibernation_ops->finish(), but if the restore fails, they have to be re-enabled by the restore code explicitly. For this purpose we can introduce two additional hibernation operations, called pre_restore() and restore_cleanup() and call them from the restore code path. Still, they should be called if the "platform" mode of hibernation has been used, so we need to pass the information about the hibernation mode from the "frozen" kernel to the "boot" kernel in the image header. Apparently, we can't drop the disabling of GPEs before the restore because of Bug #7887 .  We also can't do it unconditionally, because the GPEs wouldn't have been enabled after a successful restore if the suspend had been done in the 'shutdown' or 'reboot' mode. In principle we could (and probably should) unconditionally disable the GPEs before each snapshot creation *and* before the restore, but then we'd have to unconditionally enable them after the snapshot creation as well as after the restore (or restore failure)   Still, for this purpose we'd need to modify acpi_enter_sleep_state_prep() and acpi_leave_sleep_state() and we'd have to introduce some mechanism synchronizing the disablind/enabling of the GPEs with the device drivers' .suspend()/.resume() routines and with disable_/enable_nonboot_cpus().  However, this would have affected the suspend (ie. s2ram) code as well as the hibernation, which I'd like to avoid in this patch series. Signed-off-by: Rafael J. Wysocki Cc: Nigel Cunningham Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/suspend.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 9c7cb6430666..d235c146da2b 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -43,11 +43,15 @@ static inline void pm_restore_console(void) {} * @prepare: prepare system for hibernation * @enter: shut down system after state has been saved to disk * @finish: finish/clean up after state has been reloaded + * @pre_restore: prepare system for the restoration from a hibernation image + * @restore_cleanup: clean up after a failing image restoration */ struct hibernation_ops { int (*prepare)(void); int (*enter)(void); void (*finish)(void); + int (*pre_restore)(void); + void (*restore_cleanup)(void); }; #if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) -- cgit v1.2.3 From 0c1eecfb345401629aa57c9d3b077273e56c45a7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Jul 2007 01:47:33 -0700 Subject: Freezer: avoid freezing kernel threads prematurely Kernel threads should not have TIF_FREEZE set when user space processes are being frozen, since otherwise some of them might be frozen prematurely. To prevent this from happening we can (1) make exit_mm() unset TIF_FREEZE unconditionally just after clearing tsk->mm and (2) make try_to_freeze_tasks() check if p->mm is different from zero and PF_BORROWED_MM is unset in p->flags when user space processes are to be frozen. Namely, when user space processes are being frozen, we only should set TIF_FREEZE for tasks that have p->mm different from NULL and don't have PF_BORROWED_MM set in p->flags. For this reason task_lock() must be used to prevent try_to_freeze_tasks() from racing with use_mm()/unuse_mm(), in which p->mm and p->flags.PF_BORROWED_MM are changed under task_lock(p). Also, we need to prevent the following scenario from happening: * daemonize() is called by a task spawned from a user space code path * freezer checks if the task has p->mm set and the result is positive * task enters exit_mm() and clears its TIF_FREEZE * freezer sets TIF_FREEZE for the task * task calls try_to_freeze() and goes to the refrigerator, which is wrong at that point This requires us to acquire task_lock(p) before p->flags.PF_BORROWED_MM and p->mm are examined and release it after TIF_FREEZE is set for p (or it turns out that TIF_FREEZE should not be set). Signed-off-by: Rafael J. Wysocki Cc: Gautham R Shenoy Cc: Pavel Machek Cc: Nigel Cunningham Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/freezer.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 2d38b1a74662..c8e02de737f6 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -25,7 +25,7 @@ static inline int freezing(struct task_struct *p) /* * Request that a process be frozen */ -static inline void freeze(struct task_struct *p) +static inline void set_freeze_flag(struct task_struct *p) { set_tsk_thread_flag(p, TIF_FREEZE); } @@ -33,7 +33,7 @@ static inline void freeze(struct task_struct *p) /* * Sometimes we may need to cancel the previous 'freeze' request */ -static inline void do_not_freeze(struct task_struct *p) +static inline void clear_freeze_flag(struct task_struct *p) { clear_tsk_thread_flag(p, TIF_FREEZE); } @@ -56,7 +56,7 @@ static inline int thaw_process(struct task_struct *p) wake_up_process(p); return 1; } - clear_tsk_thread_flag(p, TIF_FREEZE); + clear_freeze_flag(p); task_unlock(p); return 0; } @@ -129,7 +129,8 @@ static inline void set_freezable(void) #else static inline int frozen(struct task_struct *p) { return 0; } static inline int freezing(struct task_struct *p) { return 0; } -static inline void freeze(struct task_struct *p) { BUG(); } +static inline void set_freeze_flag(struct task_struct *p) {} +static inline void clear_freeze_flag(struct task_struct *p) {} static inline int thaw_process(struct task_struct *p) { return 1; } static inline void refrigerator(void) {} -- cgit v1.2.3 From b10d911749d37dccfa5873d2088aea3f074b9e45 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Jul 2007 01:47:36 -0700 Subject: PM: introduce hibernation and suspend notifiers Make it possible to register hibernation and suspend notifiers, so that subsystems can perform hibernation-related or suspend-related operations that should not be carried out by device drivers' .suspend() and .resume() routines. [akpm@linux-foundation.org: build fixes] [akpm@linux-foundation.org: cleanups] Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Nigel Cunningham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/notifier.h | 6 ++++++ include/linux/suspend.h | 48 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 49 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 576f2bb34cc8..be3f2bb6fcf3 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -212,5 +212,11 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, #define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) #define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN) +/* Hibernation and suspend events */ +#define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */ +#define PM_POST_HIBERNATION 0x0002 /* Hibernation finished */ +#define PM_SUSPEND_PREPARE 0x0003 /* Going to suspend the system */ +#define PM_POST_SUSPEND 0x0004 /* Suspend finished */ + #endif /* __KERNEL__ */ #endif /* _LINUX_NOTIFIER_H */ diff --git a/include/linux/suspend.h b/include/linux/suspend.h index d235c146da2b..e8e6da394c92 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -54,7 +54,8 @@ struct hibernation_ops { void (*restore_cleanup)(void); }; -#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) +#ifdef CONFIG_PM +#ifdef CONFIG_SOFTWARE_SUSPEND /* kernel/power/snapshot.c */ extern void __register_nosave_region(unsigned long b, unsigned long e, int km); static inline void register_nosave_region(unsigned long b, unsigned long e) @@ -72,16 +73,14 @@ extern unsigned long get_safe_page(gfp_t gfp_mask); extern void hibernation_set_ops(struct hibernation_ops *ops); extern int hibernate(void); -#else -static inline void register_nosave_region(unsigned long b, unsigned long e) {} -static inline void register_nosave_region_late(unsigned long b, unsigned long e) {} +#else /* CONFIG_SOFTWARE_SUSPEND */ static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} static inline void swsusp_unset_page_free(struct page *p) {} static inline void hibernation_set_ops(struct hibernation_ops *ops) {} static inline int hibernate(void) { return -ENOSYS; } -#endif /* defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) */ +#endif /* CONFIG_SOFTWARE_SUSPEND */ void save_processor_state(void); void restore_processor_state(void); @@ -89,4 +88,43 @@ struct saved_context; void __save_processor_state(struct saved_context *ctxt); void __restore_processor_state(struct saved_context *ctxt); +/* kernel/power/main.c */ +extern struct blocking_notifier_head pm_chain_head; + +static inline int register_pm_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&pm_chain_head, nb); +} + +static inline int unregister_pm_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&pm_chain_head, nb); +} + +#define pm_notifier(fn, pri) { \ + static struct notifier_block fn##_nb = \ + { .notifier_call = fn, .priority = pri }; \ + register_pm_notifier(&fn##_nb); \ +} +#else /* CONFIG_PM */ + +static inline int register_pm_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline int unregister_pm_notifier(struct notifier_block *nb) +{ + return 0; +} + +#define pm_notifier(fn, pri) do { (void)(fn); } while (0) +#endif /* CONFIG_PM */ + +#if !defined CONFIG_SOFTWARE_SUSPEND || !defined(CONFIG_PM) +static inline void register_nosave_region(unsigned long b, unsigned long e) +{ +} +#endif + #endif /* _LINUX_SWSUSP_H */ -- cgit v1.2.3 From bd804eba1c8597cbb7cd5a5f9fe886aae16a079a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 19 Jul 2007 01:47:40 -0700 Subject: PM: Introduce pm_power_off_prepare Introduce the pm_power_off_prepare() callback that can be registered by the interested platforms in analogy with pm_idle() and pm_power_off(), used for preparing the system to power off (needed by ACPI). This allows us to drop acpi_sysclass and device_acpi that are only defined in order to register the ACPI power off preparation callback, which is needed by pm_power_off() registered in a much different way. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 2735b7cadd20..ad3cc2eb0d34 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -101,6 +101,7 @@ struct pm_dev */ extern void (*pm_idle)(void); extern void (*pm_power_off)(void); +extern void (*pm_power_off_prepare)(void); typedef int __bitwise suspend_state_t; -- cgit v1.2.3 From 5a60d6235c8352ade8f2699e72fcdfe853730456 Mon Sep 17 00:00:00 2001 From: Nigel Cunningham Date: Thu, 19 Jul 2007 01:47:41 -0700 Subject: PM: Optional beeping during resume from suspend to RAM Add a feature allowing the user to make the system beep during a resume from suspend to RAM, on x86_64 and i386. This is useful for the users with broken resume from RAM, so that they can verify if the control reaches the kernel after a wake-up event. Signed-off-by: Rafael J. Wysocki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fccd8b548d93..c0ccdd720363 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -123,6 +123,7 @@ extern int pci_mmcfg_config_num; extern int sbf_port; extern unsigned long acpi_video_flags; +extern unsigned long s2ram_beep; #else /* !CONFIG_ACPI */ -- cgit v1.2.3 From 77afcf78a2ded9a91838734234949c0ead5feb12 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 19 Jul 2007 01:47:41 -0700 Subject: PM: Integrate beeping flag with existing acpi_sleep flags Move "debug during resume from s2ram" into the variable we already use for real-mode flags to simplify code. It also closes nasty trap for the user in acpi_sleep_setup; order of parameters actually mattered there, acpi_sleep=s3_bios,s3_mode doing something different from acpi_sleep=s3_mode,s3_bios. Signed-off-by: Pavel Machek Signed-off-by: Rafael J. Wysocki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/acpi.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c0ccdd720363..dc234c508a6f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -122,8 +122,7 @@ extern struct acpi_mcfg_allocation *pci_mmcfg_config; extern int pci_mmcfg_config_num; extern int sbf_port; -extern unsigned long acpi_video_flags; -extern unsigned long s2ram_beep; +extern unsigned long acpi_realmode_flags; #else /* !CONFIG_ACPI */ -- cgit v1.2.3 From e53252d97e670a38b1d2e9723b48077bba11ddda Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 19 Jul 2007 01:47:51 -0700 Subject: unregister_chrdev() return void unregister_chrdev() does not return meaningful value. This patch makes it return void like most unregister_* functions. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9562a59b3703..75dd16efc9b6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1463,7 +1463,7 @@ extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); extern int register_chrdev_region(dev_t, unsigned, const char *); extern int register_chrdev(unsigned int, const char *, const struct file_operations *); -extern int unregister_chrdev(unsigned int, const char *); +extern void unregister_chrdev(unsigned int, const char *); extern void unregister_chrdev_region(dev_t, unsigned); extern int chrdev_open(struct inode *, struct file *); extern void chrdev_show(struct seq_file *,off_t); -- cgit v1.2.3 From 2ba2d00363975242dee9bb22cf798b487e3cd61e Mon Sep 17 00:00:00 2001 From: David Brownell Date: Thu, 19 Jul 2007 01:47:55 -0700 Subject: AIO sparse fix (type of ki_flags) Fix type issue reported by latest 'sparse': kiocb.ki_flags should be "unsigned long" (not "long"), to match bitop type signature. Signed-off-by: David Brownell Signed-off-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/aio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index b903fc02bdb7..d10e608f232d 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -86,7 +86,7 @@ struct kioctx; */ struct kiocb { struct list_head ki_run_list; - long ki_flags; + unsigned long ki_flags; int ki_users; unsigned ki_key; /* id of this request */ -- cgit v1.2.3 From d77c2d7cc5126639a47d73300b40d461f2811a0f Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Thu, 19 Jul 2007 01:47:55 -0700 Subject: readahead: introduce PG_readahead Introduce a new page flag: PG_readahead. It acts as a look-ahead mark, which tells the page reader: Hey, it's time to invoke the read-ahead logic. For the sake of I/O pipelining, don't wait until it runs out of cached pages! Signed-off-by: Fengguang Wu Cc: Steven Pratt Cc: Ram Pai Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 731cd2ac3227..709d92fd2877 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -83,6 +83,7 @@ #define PG_private 11 /* If pagecache, has fs-private data */ #define PG_writeback 12 /* Page is under writeback */ +#define PG_readahead 13 /* Reminder to do async read-ahead */ #define PG_compound 14 /* Part of a compound page */ #define PG_swapcache 15 /* Swap page: swp_entry_t in private */ @@ -226,6 +227,10 @@ static inline void SetPageUptodate(struct page *page) #define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags) #define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags) +#define PageReadahead(page) test_bit(PG_readahead, &(page)->flags) +#define SetPageReadahead(page) set_bit(PG_readahead, &(page)->flags) +#define ClearPageReadahead(page) clear_bit(PG_readahead, &(page)->flags) + #define PageReclaim(page) test_bit(PG_reclaim, &(page)->flags) #define SetPageReclaim(page) set_bit(PG_reclaim, &(page)->flags) #define ClearPageReclaim(page) clear_bit(PG_reclaim, &(page)->flags) -- cgit v1.2.3 From 5ce1110b92b31d079aa443e967f43a2294e01194 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Thu, 19 Jul 2007 01:47:59 -0700 Subject: readahead: data structure and routines Extend struct file_ra_state to support the on-demand readahead logic. Also define some helpers for it. Signed-off-by: Fengguang Wu Cc: Steven Pratt Cc: Ram Pai Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 75dd16efc9b6..9a5f562abc77 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -695,6 +695,10 @@ struct fown_struct { /* * Track a single file's readahead state + * + * ================#============|==================#==================| + * ^ ^ ^ ^ + * file_ra_state.la_index .ra_index .lookahead_index .readahead_index */ struct file_ra_state { unsigned long start; /* Current window */ @@ -704,6 +708,12 @@ struct file_ra_state { unsigned long prev_index; /* Cache last read() position */ unsigned long ahead_start; /* Ahead window */ unsigned long ahead_size; + + pgoff_t la_index; /* enqueue time */ + pgoff_t ra_index; /* begin offset */ + pgoff_t lookahead_index; /* time to do next readahead */ + pgoff_t readahead_index; /* end offset */ + unsigned long ra_pages; /* Maximum readahead window */ unsigned long mmap_hit; /* Cache hit stat for mmap accesses */ unsigned long mmap_miss; /* Cache miss stat for mmap accesses */ @@ -712,6 +722,60 @@ struct file_ra_state { #define RA_FLAG_MISS 0x01 /* a cache miss occured against this file */ #define RA_FLAG_INCACHE 0x02 /* file is already in cache */ +/* + * Measuring read-ahead sizes. + * + * |----------- readahead size ------------>| + * ===#============|==================#=====================| + * |------- invoke interval ------>|-- lookahead size -->| + */ +static inline unsigned long ra_readahead_size(struct file_ra_state *ra) +{ + return ra->readahead_index - ra->ra_index; +} + +static inline unsigned long ra_lookahead_size(struct file_ra_state *ra) +{ + return ra->readahead_index - ra->lookahead_index; +} + +static inline unsigned long ra_invoke_interval(struct file_ra_state *ra) +{ + return ra->lookahead_index - ra->la_index; +} + +/* + * Check if @index falls in the readahead windows. + */ +static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) +{ + return (index >= ra->la_index && + index < ra->readahead_index); +} + +/* + * Where is the old read-ahead and look-ahead? + */ +static inline void ra_set_index(struct file_ra_state *ra, + pgoff_t la_index, pgoff_t ra_index) +{ + ra->la_index = la_index; + ra->ra_index = ra_index; +} + +/* + * Where is the new read-ahead and look-ahead? + */ +static inline void ra_set_size(struct file_ra_state *ra, + unsigned long ra_size, unsigned long la_size) +{ + ra->readahead_index = ra->ra_index + ra_size; + ra->lookahead_index = ra->ra_index + ra_size - la_size; +} + +unsigned long ra_submit(struct file_ra_state *ra, + struct address_space *mapping, struct file *filp); + struct file { /* * fu_list becomes invalid after file_free is called and queued via -- cgit v1.2.3 From 122a21d11cbfda6d1e33cbc8ae9e4c4ee2f1886e Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Thu, 19 Jul 2007 01:48:01 -0700 Subject: readahead: on-demand readahead logic This is a minimal readahead algorithm that aims to replace the current one. It is more flexible and reliable, while maintaining almost the same behavior and performance. Also it is full integrated with adaptive readahead. It is designed to be called on demand: - on a missing page, to do synchronous readahead - on a lookahead page, to do asynchronous readahead In this way it eliminated the awkward workarounds for cache hit/miss, readahead thrashing, retried read, and unaligned read. It also adopts the data structure introduced by adaptive readahead, parameterizes readahead pipelining with `lookahead_index', and reduces the current/ahead windows to one single window. HEURISTICS The logic deals with four cases: - sequential-next found a consistent readahead window, so push it forward - random standalone small read, so read as is - sequential-first create a new readahead window for a sequential/oversize request - lookahead-clueless hit a lookahead page not associated with the readahead window, so create a new readahead window and ramp it up In each case, three parameters are determined: - readahead index: where the next readahead begins - readahead size: how much to readahead - lookahead size: when to do the next readahead (for pipelining) BEHAVIORS The old behaviors are maximally preserved for trivial sequential/random reads. Notable changes are: - It no longer imposes strict sequential checks. It might help some interleaved cases, and clustered random reads. It does introduce risks of a random lookahead hit triggering an unexpected readahead. But in general it is more likely to do good than to do evil. - Interleaved reads are supported in a minimal way. Their chances of being detected and proper handled are still low. - Readahead thrashings are better handled. The current readahead leads to tiny average I/O sizes, because it never turn back for the thrashed pages. They have to be fault in by do_generic_mapping_read() one by one. Whereas the on-demand readahead will redo readahead for them. OVERHEADS The new code reduced the overheads of - excessively calling the readahead routine on small sized reads (the current readahead code insists on seeing all requests) - doing a lot of pointless page-cache lookups for small cached files (the current readahead only turns itself off after 256 cache hits, unfortunately most files are < 1MB, so never see that chance) That accounts for speedup of - 0.3% on 1-page sequential reads on sparse file - 1.2% on 1-page cache hot sequential reads - 3.2% on 256-page cache hot sequential reads - 1.3% on cache hot `tar /lib` However, it does introduce one extra page-cache lookup per cache miss, which impacts random reads slightly. That's 1% overheads for 1-page random reads on sparse file. PERFORMANCE The basic benchmark setup is - 2.6.20 kernel with on-demand readahead - 1MB max readahead size - 2.9GHz Intel Core 2 CPU - 2GB memory - 160G/8M Hitachi SATA II 7200 RPM disk The benchmarks show that - it maintains the same performance for trivial sequential/random reads - sysbench/OLTP performance on MySQL gains up to 8% - performance on readahead thrashing gains up to 3 times iozone throughput (KB/s): roughly the same ========================================== iozone -c -t1 -s 4096m -r 64k 2.6.20 on-demand gain first run " Initial write " 61437.27 64521.53 +5.0% " Rewrite " 47893.02 48335.20 +0.9% " Read " 62111.84 62141.49 +0.0% " Re-read " 62242.66 62193.17 -0.1% " Reverse Read " 50031.46 49989.79 -0.1% " Stride read " 8657.61 8652.81 -0.1% " Random read " 13914.28 13898.23 -0.1% " Mixed workload " 19069.27 19033.32 -0.2% " Random write " 14849.80 14104.38 -5.0% " Pwrite " 62955.30 65701.57 +4.4% " Pread " 62209.99 62256.26 +0.1% second run " Initial write " 60810.31 66258.69 +9.0% " Rewrite " 49373.89 57833.66 +17.1% " Read " 62059.39 62251.28 +0.3% " Re-read " 62264.32 62256.82 -0.0% " Reverse Read " 49970.96 50565.72 +1.2% " Stride read " 8654.81 8638.45 -0.2% " Random read " 13901.44 13949.91 +0.3% " Mixed workload " 19041.32 19092.04 +0.3% " Random write " 14019.99 14161.72 +1.0% " Pwrite " 64121.67 68224.17 +6.4% " Pread " 62225.08 62274.28 +0.1% In summary, writes are unstable, reads are pretty close on average: access pattern 2.6.20 on-demand gain Read 62085.61 62196.38 +0.2% Re-read 62253.49 62224.99 -0.0% Reverse Read 50001.21 50277.75 +0.6% Stride read 8656.21 8645.63 -0.1% Random read 13907.86 13924.07 +0.1% Mixed workload 19055.29 19062.68 +0.0% Pread 62217.53 62265.27 +0.1% aio-stress: roughly the same ============================ aio-stress -l -s4096 -r128 -t1 -o1 knoppix511-dvd-cn.iso aio-stress -l -s4096 -r128 -t1 -o3 knoppix511-dvd-cn.iso 2.6.20 on-demand delta sequential 92.57s 92.54s -0.0% random 311.87s 312.15s +0.1% sysbench fileio: roughly the same ================================= sysbench --test=fileio --file-io-mode=async --file-test-mode=rndrw \ --file-total-size=4G --file-block-size=64K \ --num-threads=001 --max-requests=10000 --max-time=900 run threads 2.6.20 on-demand delta first run 1 59.1974s 59.2262s +0.0% 2 58.0575s 58.2269s +0.3% 4 48.0545s 47.1164s -2.0% 8 41.0684s 41.2229s +0.4% 16 35.8817s 36.4448s +1.6% 32 32.6614s 32.8240s +0.5% 64 23.7601s 24.1481s +1.6% 128 24.3719s 23.8225s -2.3% 256 23.2366s 22.0488s -5.1% second run 1 59.6720s 59.5671s -0.2% 8 41.5158s 41.9541s +1.1% 64 25.0200s 23.9634s -4.2% 256 22.5491s 20.9486s -7.1% Note that the numbers are not very stable because of the writes. The overall performance is close when we sum all seconds up: sum all up 495.046s 491.514s -0.7% sysbench oltp (trans/sec): up to 8% gain ======================================== sysbench --test=oltp --oltp-table-size=10000000 --oltp-read-only \ --mysql-socket=/var/run/mysqld/mysqld.sock \ --mysql-user=root --mysql-password=readahead \ --num-threads=064 --max-requests=10000 --max-time=900 run 10000-transactions run threads 2.6.20 on-demand gain 1 62.81 64.56 +2.8% 2 67.97 70.93 +4.4% 4 81.81 85.87 +5.0% 8 94.60 97.89 +3.5% 16 99.07 104.68 +5.7% 32 95.93 104.28 +8.7% 64 96.48 103.68 +7.5% 5000-transactions run 1 48.21 48.65 +0.9% 8 68.60 70.19 +2.3% 64 70.57 74.72 +5.9% 2000-transactions run 1 37.57 38.04 +1.3% 2 38.43 38.99 +1.5% 4 45.39 46.45 +2.3% 8 51.64 52.36 +1.4% 16 54.39 55.18 +1.5% 32 52.13 54.49 +4.5% 64 54.13 54.61 +0.9% That's interesting results. Some investigations show that - MySQL is accessing the db file non-uniformly: some parts are more hot than others - It is mostly doing 4-page random reads, and sometimes doing two reads in a row, the latter one triggers a 16-page readahead. - The on-demand readahead leaves many lookahead pages (flagged PG_readahead) there. Many of them will be hit, and trigger more readahead pages. Which might save more seeks. - Naturally, the readahead windows tend to lie in hot areas, and the lookahead pages in hot areas is more likely to be hit. - The more overall read density, the more possible gain. That also explains the adaptive readahead tricks for clustered random reads. readahead thrashing: 3 times better =================================== We boot kernel with "mem=128m single", and start a 100KB/s stream on every second, until reaching 200 streams. max throughput min avg I/O size 2.6.20: 5MB/s 16KB on-demand: 15MB/s 140KB Signed-off-by: Fengguang Wu Cc: Steven Pratt Cc: Ram Pai Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f8e12b3b6110..619c0e80cf0c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1138,6 +1138,12 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp, pgoff_t offset, unsigned long nr_to_read); int force_page_cache_readahead(struct address_space *mapping, struct file *filp, pgoff_t offset, unsigned long nr_to_read); +unsigned long page_cache_readahead_ondemand(struct address_space *mapping, + struct file_ra_state *ra, + struct file *filp, + struct page *page, + pgoff_t offset, + unsigned long size); unsigned long page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, struct file *filp, -- cgit v1.2.3 From c743d96b6d2ff55a94df7b5ac7c74987bb9c343b Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Thu, 19 Jul 2007 01:48:04 -0700 Subject: readahead: remove the old algorithm Remove the old readahead algorithm. Signed-off-by: Fengguang Wu Cc: Steven Pratt Cc: Ram Pai Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 11 +---------- include/linux/mm.h | 7 ------- 2 files changed, 1 insertion(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9a5f562abc77..29cb32d3a849 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -701,14 +701,6 @@ struct fown_struct { * file_ra_state.la_index .ra_index .lookahead_index .readahead_index */ struct file_ra_state { - unsigned long start; /* Current window */ - unsigned long size; - unsigned long flags; /* ra flags RA_FLAG_xxx*/ - unsigned long cache_hit; /* cache hit count*/ - unsigned long prev_index; /* Cache last read() position */ - unsigned long ahead_start; /* Ahead window */ - unsigned long ahead_size; - pgoff_t la_index; /* enqueue time */ pgoff_t ra_index; /* begin offset */ pgoff_t lookahead_index; /* time to do next readahead */ @@ -717,10 +709,9 @@ struct file_ra_state { unsigned long ra_pages; /* Maximum readahead window */ unsigned long mmap_hit; /* Cache hit stat for mmap accesses */ unsigned long mmap_miss; /* Cache miss stat for mmap accesses */ + unsigned long prev_index; /* Cache last read() position */ unsigned int prev_offset; /* Offset where last read() ended in a page */ }; -#define RA_FLAG_MISS 0x01 /* a cache miss occured against this file */ -#define RA_FLAG_INCACHE 0x02 /* file is already in cache */ /* * Measuring read-ahead sizes. diff --git a/include/linux/mm.h b/include/linux/mm.h index 619c0e80cf0c..3d0d7d285237 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1144,13 +1144,6 @@ unsigned long page_cache_readahead_ondemand(struct address_space *mapping, struct page *page, pgoff_t offset, unsigned long size); -unsigned long page_cache_readahead(struct address_space *mapping, - struct file_ra_state *ra, - struct file *filp, - pgoff_t offset, - unsigned long size); -void handle_ra_miss(struct address_space *mapping, - struct file_ra_state *ra, pgoff_t offset); unsigned long max_sane_readahead(unsigned long nr); /* Do stack extension */ -- cgit v1.2.3 From fe3cba17c49471e99d3421e675fc8b3deaaf0b70 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Thu, 19 Jul 2007 01:48:07 -0700 Subject: mm: share PG_readahead and PG_reclaim Share the same page flag bit for PG_readahead and PG_reclaim. One is used only on file reads, another is only for emergency writes. One is used mostly for fresh/young pages, another is for old pages. Combinations of possible interactions are: a) clear PG_reclaim => implicit clear of PG_readahead it will delay an asynchronous readahead into a synchronous one it actually does _good_ for readahead: the pages will be reclaimed soon, it's readahead thrashing! in this case, synchronous readahead makes more sense. b) clear PG_readahead => implicit clear of PG_reclaim one(and only one) page will not be reclaimed in time it can be avoided by checking PageWriteback(page) in readahead first c) set PG_reclaim => implicit set of PG_readahead will confuse readahead and make it restart the size rampup process it's a trivial problem, and can mostly be avoided by checking PageWriteback(page) first in readahead d) set PG_readahead => implicit set of PG_reclaim PG_readahead will never be set on already cached pages. PG_reclaim will always be cleared on dirtying a page. so not a problem. In summary, a) we get better behavior b,d) possible interactions can be avoided c) racy condition exists that might affect readahead, but the chance is _really_ low, and the hurt on readahead is trivial. Compound pages also use PG_reclaim, but for now they do not interact with reclaim/readahead code. Signed-off-by: Fengguang Wu Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 709d92fd2877..a454176c3e30 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -83,7 +83,6 @@ #define PG_private 11 /* If pagecache, has fs-private data */ #define PG_writeback 12 /* Page is under writeback */ -#define PG_readahead 13 /* Reminder to do async read-ahead */ #define PG_compound 14 /* Part of a compound page */ #define PG_swapcache 15 /* Swap page: swp_entry_t in private */ @@ -91,6 +90,9 @@ #define PG_reclaim 17 /* To be reclaimed asap */ #define PG_buddy 19 /* Page is free, on buddy lists */ +/* PG_readahead is only used for file reads; PG_reclaim is only for writes */ +#define PG_readahead PG_reclaim /* Reminder to do async read-ahead */ + /* PG_owner_priv_1 users should have descriptive aliases */ #define PG_checked PG_owner_priv_1 /* Used by some filesystems */ #define PG_pinned PG_owner_priv_1 /* Xen pinned pagetable */ -- cgit v1.2.3 From cf914a7d656e62b9dd3e0dffe4f62b953ae6048d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 19 Jul 2007 01:48:08 -0700 Subject: readahead: split ondemand readahead interface into two functions Split ondemand readahead interface into two functions. I think this makes it a little clearer for non-readahead experts (like Rusty). Internally they both call ondemand_readahead(), but the page argument is changed to an obvious boolean flag. Signed-off-by: Rusty Russell Signed-off-by: Fengguang Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3d0d7d285237..50a0ed1d1806 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1138,12 +1138,20 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp, pgoff_t offset, unsigned long nr_to_read); int force_page_cache_readahead(struct address_space *mapping, struct file *filp, pgoff_t offset, unsigned long nr_to_read); -unsigned long page_cache_readahead_ondemand(struct address_space *mapping, - struct file_ra_state *ra, - struct file *filp, - struct page *page, - pgoff_t offset, - unsigned long size); + +void page_cache_sync_readahead(struct address_space *mapping, + struct file_ra_state *ra, + struct file *filp, + pgoff_t offset, + unsigned long size); + +void page_cache_async_readahead(struct address_space *mapping, + struct file_ra_state *ra, + struct file *filp, + struct page *pg, + pgoff_t offset, + unsigned long size); + unsigned long max_sane_readahead(unsigned long nr); /* Do stack extension */ -- cgit v1.2.3 From f9acc8c7b35a100f3a9e0e6977f7807b0169f9a5 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Thu, 19 Jul 2007 01:48:08 -0700 Subject: readahead: sanify file_ra_state names Rename some file_ra_state variables and remove some accessors. It results in much simpler code. Kudos to Rusty! Signed-off-by: Fengguang Wu Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 61 ++++++------------------------------------------------ 1 file changed, 6 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 29cb32d3a849..d33beadd9a43 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -695,16 +695,12 @@ struct fown_struct { /* * Track a single file's readahead state - * - * ================#============|==================#==================| - * ^ ^ ^ ^ - * file_ra_state.la_index .ra_index .lookahead_index .readahead_index */ struct file_ra_state { - pgoff_t la_index; /* enqueue time */ - pgoff_t ra_index; /* begin offset */ - pgoff_t lookahead_index; /* time to do next readahead */ - pgoff_t readahead_index; /* end offset */ + pgoff_t start; /* where readahead started */ + unsigned long size; /* # of readahead pages */ + unsigned long async_size; /* do asynchronous readahead when + there are only # of pages ahead */ unsigned long ra_pages; /* Maximum readahead window */ unsigned long mmap_hit; /* Cache hit stat for mmap accesses */ @@ -713,60 +709,15 @@ struct file_ra_state { unsigned int prev_offset; /* Offset where last read() ended in a page */ }; -/* - * Measuring read-ahead sizes. - * - * |----------- readahead size ------------>| - * ===#============|==================#=====================| - * |------- invoke interval ------>|-- lookahead size -->| - */ -static inline unsigned long ra_readahead_size(struct file_ra_state *ra) -{ - return ra->readahead_index - ra->ra_index; -} - -static inline unsigned long ra_lookahead_size(struct file_ra_state *ra) -{ - return ra->readahead_index - ra->lookahead_index; -} - -static inline unsigned long ra_invoke_interval(struct file_ra_state *ra) -{ - return ra->lookahead_index - ra->la_index; -} - /* * Check if @index falls in the readahead windows. */ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) { - return (index >= ra->la_index && - index < ra->readahead_index); -} - -/* - * Where is the old read-ahead and look-ahead? - */ -static inline void ra_set_index(struct file_ra_state *ra, - pgoff_t la_index, pgoff_t ra_index) -{ - ra->la_index = la_index; - ra->ra_index = ra_index; + return (index >= ra->start && + index < ra->start + ra->size); } -/* - * Where is the new read-ahead and look-ahead? - */ -static inline void ra_set_size(struct file_ra_state *ra, - unsigned long ra_size, unsigned long la_size) -{ - ra->readahead_index = ra->ra_index + ra_size; - ra->lookahead_index = ra->ra_index + ra_size - la_size; -} - -unsigned long ra_submit(struct file_ra_state *ra, - struct address_space *mapping, struct file *filp); - struct file { /* * fu_list becomes invalid after file_free is called and queued via -- cgit v1.2.3 From 81eae375eceba481ca4c605d42913871f093f6d5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 19 Jul 2007 01:48:09 -0700 Subject: jprobes: make struct jprobe.entry a void * Currently jprobe.entry is a kprobe_opcode_t *, but that's a lie. On some platforms it doesn't point to an opcode at all, it points to a function descriptor. It's really a pointer to something that the arch code can turn into a function entry point. And that's what actually happens, none of the generic code ever looks at jprobe.entry, it's only ever dereferenced by arch code. So just make it a void *. Signed-off-by: Michael Ellerman Cc: Prasanna S Panchamukhi Acked-by: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 23adf6075ae4..f4e53b71d23f 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -116,7 +116,7 @@ struct kprobe { */ struct jprobe { struct kprobe kp; - kprobe_opcode_t *entry; /* probe handling code to jump to */ + void *entry; /* probe handling code to jump to */ }; DECLARE_PER_CPU(struct kprobe *, current_kprobe); -- cgit v1.2.3 From 9e367d859297b9377d65574f538cf52730e9eda8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 19 Jul 2007 01:48:10 -0700 Subject: jprobes: remove JPROBE_ENTRY() AFAICT now that jprobe.entry is a void *, JPROBE_ENTRY doesn't do anything useful - so remove it .. I've left a do-nothing version so that out-of-tree jprobes code will still compile without modifications. Signed-off-by: Michael Ellerman Cc: Prasanna S Panchamukhi Acked-by: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kprobes.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index f4e53b71d23f..bd892850c94a 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -119,6 +119,9 @@ struct jprobe { void *entry; /* probe handling code to jump to */ }; +/* For backward compatibility with old code using JPROBE_ENTRY() */ +#define JPROBE_ENTRY(handler) (handler) + DECLARE_PER_CPU(struct kprobe *, current_kprobe); DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -- cgit v1.2.3 From 3d7e33825d8799115dd2495c9944badd3272a623 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 19 Jul 2007 01:48:11 -0700 Subject: jprobes: make jprobes a little safer for users I realise jprobes are a razor-blades-included type of interface, but that doesn't mean we can't try and make them safer to use. This guy I know once wrote code like this: struct jprobe jp = { .kp.symbol_name = "foo", .entry = "jprobe_foo" }; And then his kernel exploded. Oops. This patch adds an arch hook, arch_deref_entry_point() (I don't like it either) which takes the void * in a struct jprobe, and gives back the text address that it represents. We can then use that in register_jprobe() to check that the entry point we're passed is actually in the kernel text, rather than just some random value. Signed-off-by: Michael Ellerman Cc: Prasanna S Panchamukhi Acked-by: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kprobes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index bd892850c94a..51464d12a4e5 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -214,6 +214,7 @@ int longjmp_break_handler(struct kprobe *, struct pt_regs *); int register_jprobe(struct jprobe *p); void unregister_jprobe(struct jprobe *p); void jprobe_return(void); +unsigned long arch_deref_entry_point(void *); int register_kretprobe(struct kretprobe *rp); void unregister_kretprobe(struct kretprobe *rp); -- cgit v1.2.3 From bdf4c48af20a3b0f01671799ace345e3d49576da Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Jul 2007 01:48:15 -0700 Subject: audit: rework execve audit The purpose of audit_bprm() is to log the argv array to a userspace daemon at the end of the execve system call. Since user-space hasn't had time to run, this array is still in pristine state on the process' stack; so no need to copy it, we can just grab it from there. In order to minimize the damage to audit_log_*() copy each string into a temporary kernel buffer first. Currently the audit code requires that the full argument vector fits in a single packet. So currently it does clip the argv size to a (sysctl) limit, but only when execve auditing is enabled. If the audit protocol gets extended to allow for multiple packets this check can be removed. Signed-off-by: Peter Zijlstra Signed-off-by: Ollie Wild Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index e1a708337be3..a0b209cd5761 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -40,6 +40,7 @@ struct linux_binprm{ unsigned interp_flags; unsigned interp_data; unsigned long loader, exec; + unsigned long argv_len; }; #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 -- cgit v1.2.3 From b6a2fea39318e43fee84fa7b0b90d68bed92d2ba Mon Sep 17 00:00:00 2001 From: Ollie Wild Date: Thu, 19 Jul 2007 01:48:16 -0700 Subject: mm: variable length argument support Remove the arg+env limit of MAX_ARG_PAGES by copying the strings directly from the old mm into the new mm. We create the new mm before the binfmt code runs, and place the new stack at the very top of the address space. Once the binfmt code runs and figures out where the stack should be, we move it downwards. It is a bit peculiar in that we have one task with two mm's, one of which is inactive. [a.p.zijlstra@chello.nl: limit stack size] Signed-off-by: Ollie Wild Signed-off-by: Peter Zijlstra Cc: Cc: Hugh Dickins [bunk@stusta.de: unexport bprm_mm_init] Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 18 +++++++++++++----- include/linux/mm.h | 9 ++++++++- 2 files changed, 21 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index a0b209cd5761..91c8c07fe8b7 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -6,11 +6,13 @@ struct pt_regs; /* - * MAX_ARG_PAGES defines the number of pages allocated for arguments - * and envelope for the new program. 32 should suffice, this gives - * a maximum env+arg of 128kB w/4KB pages! + * These are the maximum length and maximum number of strings passed to the + * execve() system call. MAX_ARG_STRLEN is essentially random but serves to + * prevent the kernel from being unduly impacted by misaddressed pointers. + * MAX_ARG_STRINGS is chosen to fit in a signed 32-bit integer. */ -#define MAX_ARG_PAGES 32 +#define MAX_ARG_STRLEN (PAGE_SIZE * 32) +#define MAX_ARG_STRINGS 0x7FFFFFFF /* sizeof(linux_binprm->buf) */ #define BINPRM_BUF_SIZE 128 @@ -24,7 +26,12 @@ struct pt_regs; */ struct linux_binprm{ char buf[BINPRM_BUF_SIZE]; +#ifdef CONFIG_MMU + struct vm_area_struct *vma; +#else +# define MAX_ARG_PAGES 32 struct page *page[MAX_ARG_PAGES]; +#endif struct mm_struct *mm; unsigned long p; /* current top of mem */ int sh_bang; @@ -69,7 +76,7 @@ extern int register_binfmt(struct linux_binfmt *); extern int unregister_binfmt(struct linux_binfmt *); extern int prepare_binprm(struct linux_binprm *); -extern void remove_arg_zero(struct linux_binprm *); +extern int __must_check remove_arg_zero(struct linux_binprm *); extern int search_binary_handler(struct linux_binprm *,struct pt_regs *); extern int flush_old_exec(struct linux_binprm * bprm); @@ -86,6 +93,7 @@ extern int suid_dumpable; extern int setup_arg_pages(struct linux_binprm * bprm, unsigned long stack_top, int executable_stack); +extern int bprm_mm_init(struct linux_binprm *bprm); extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); extern void compute_creds(struct linux_binprm *binprm); extern int do_coredump(long signr, int exit_code, struct pt_regs * regs); diff --git a/include/linux/mm.h b/include/linux/mm.h index 50a0ed1d1806..c456c3a1c28e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -808,7 +808,6 @@ static inline int handle_mm_fault(struct mm_struct *mm, extern int make_pages_present(unsigned long addr, unsigned long end); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); -void install_arg_page(struct vm_area_struct *, struct page *, unsigned long); int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); @@ -825,9 +824,15 @@ int FASTCALL(set_page_dirty(struct page *page)); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); +extern unsigned long move_page_tables(struct vm_area_struct *vma, + unsigned long old_addr, struct vm_area_struct *new_vma, + unsigned long new_addr, unsigned long len); extern unsigned long do_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr); +extern int mprotect_fixup(struct vm_area_struct *vma, + struct vm_area_struct **pprev, unsigned long start, + unsigned long end, unsigned long newflags); /* * A callback you can register to apply pressure to ageable caches. @@ -1159,6 +1164,8 @@ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); #ifdef CONFIG_IA64 extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); #endif +extern int expand_stack_downwards(struct vm_area_struct *vma, + unsigned long address); /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); -- cgit v1.2.3 From 16f1820028d660d9da9c03b2ae7e98253c11795b Mon Sep 17 00:00:00 2001 From: Josef 'Jeff' Sipek Date: Thu, 19 Jul 2007 01:48:18 -0700 Subject: fs: introduce vfs_path_lookup Stackable file systems, among others, frequently need to lookup paths or path components starting from an arbitrary point in the namespace (identified by a dentry and a vfsmount). Currently, such file systems use lookup_one_len, which is frowned upon [1] as it does not pass the lookup intent along; not passing a lookup intent, for example, can trigger BUG_ON's when stacking on top of NFSv4. The first patch introduces a new lookup function to allow lookup starting from an arbitrary point in the namespace. This approach has been suggested by Christoph Hellwig [2]. The second patch changes sunrpc to use vfs_path_lookup. The third patch changes nfsctl.c to use vfs_path_lookup. The fourth patch marks link_path_walk static. The fifth, and last patch, unexports path_walk because it is no longer unnecessary to call it directly, and using the new vfs_path_lookup is cleaner. For example, the following snippet of code, looks up "some/path/component" in a directory pointed to by parent_{dentry,vfsmnt}: err = vfs_path_lookup(parent_dentry, parent_vfsmnt, "some/path/component", 0, &nd); if (!err) { /* exits */ ... /* once done, release the references */ path_release(&nd); } else if (err == -ENOENT) { /* doesn't exist */ } else { /* other error */ } VFS functions such as lookup_create can be used on the nameidata structure to pass the create intent to the file system. Signed-off-by: Josef 'Jeff' Sipek Cc: Al Viro Acked-by: Christoph Hellwig Cc: Trond Myklebust Cc: Neil Brown Cc: Michael Halcrow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/namei.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index b7dd24917f0d..2e21af0989d9 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -69,6 +69,8 @@ extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struc #define user_path_walk_link(name,nd) \ __user_walk_fd(AT_FDCWD, name, 0, nd) extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); +extern int vfs_path_lookup(struct dentry *, struct vfsmount *, + const char *, unsigned int, struct nameidata *); extern int FASTCALL(path_walk(const char *, struct nameidata *)); extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); extern void path_release(struct nameidata *); -- cgit v1.2.3 From c4a7808fc3d7a346d5d12e0d69d76d66d821488b Mon Sep 17 00:00:00 2001 From: Josef 'Jeff' Sipek Date: Thu, 19 Jul 2007 01:48:22 -0700 Subject: fs: mark link_path_walk static Signed-off-by: Josef 'Jeff' Sipek Cc: Al Viro Acked-by: Christoph Hellwig Cc: Trond Myklebust Cc: Neil Brown Cc: Michael Halcrow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/namei.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index 2e21af0989d9..18ea81265068 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -72,7 +72,6 @@ extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct nameidata *); extern int FASTCALL(path_walk(const char *, struct nameidata *)); -extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); extern void path_release(struct nameidata *); extern void path_release_on_umount(struct nameidata *); -- cgit v1.2.3 From f79c20f52532d38fd0aee7ef64e138cc1613c484 Mon Sep 17 00:00:00 2001 From: Josef 'Jeff' Sipek Date: Thu, 19 Jul 2007 01:48:22 -0700 Subject: fs: remove path_walk export Signed-off-by: Josef 'Jeff' Sipek Cc: Al Viro Acked-by: Christoph Hellwig Cc: Trond Myklebust Cc: Neil Brown Cc: Michael Halcrow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/namei.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index 18ea81265068..6c38efbd810f 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -71,7 +71,6 @@ extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struc extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct nameidata *); -extern int FASTCALL(path_walk(const char *, struct nameidata *)); extern void path_release(struct nameidata *); extern void path_release_on_umount(struct nameidata *); -- cgit v1.2.3 From 6c5d523826dc639df709ed0f88c5d2ce25379652 Mon Sep 17 00:00:00 2001 From: "Kawai, Hidehiro" Date: Thu, 19 Jul 2007 01:48:27 -0700 Subject: coredump masking: reimplementation of dumpable using two flags This patch changes mm_struct.dumpable to a pair of bit flags. set_dumpable() converts three-value dumpable to two flags and stores it into lower two bits of mm_struct.flags instead of mm_struct.dumpable. get_dumpable() behaves in the opposite way. [akpm@linux-foundation.org: export set_dumpable] Signed-off-by: Hidehiro Kawai Cc: Alan Cox Cc: David Howells Cc: Hugh Dickins Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 731edaca8ffd..8dbd08366400 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -345,6 +345,13 @@ typedef unsigned long mm_counter_t; (mm)->hiwater_vm = (mm)->total_vm; \ } while (0) +extern void set_dumpable(struct mm_struct *mm, int value); +extern int get_dumpable(struct mm_struct *mm); + +/* mm flags */ +#define MMF_DUMPABLE 0 /* core dump is permitted */ +#define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ + struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ struct rb_root mm_rb; @@ -402,7 +409,7 @@ struct mm_struct { unsigned int token_priority; unsigned int last_interval; - unsigned char dumpable:2; + unsigned long flags; /* Must use atomic bitops to access the bits */ /* coredumping support */ int core_waiters; -- cgit v1.2.3 From 3cb4a0bb1e773e3c41800b33a3f7dab32bd06c64 Mon Sep 17 00:00:00 2001 From: "Kawai, Hidehiro" Date: Thu, 19 Jul 2007 01:48:28 -0700 Subject: coredump masking: add an interface for core dump filter This patch adds an interface to set/reset flags which determines each memory segment should be dumped or not when a core file is generated. /proc//coredump_filter file is provided to access the flags. You can change the flag status for a particular process by writing to or reading from the file. The flag status is inherited to the child process when it is created. Signed-off-by: Hidehiro Kawai Cc: Alan Cox Cc: David Howells Cc: Hugh Dickins Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8dbd08366400..94f624aef017 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -349,8 +349,22 @@ extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); /* mm flags */ +/* dumpable bits */ #define MMF_DUMPABLE 0 /* core dump is permitted */ #define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ +#define MMF_DUMPABLE_BITS 2 + +/* coredump filter bits */ +#define MMF_DUMP_ANON_PRIVATE 2 +#define MMF_DUMP_ANON_SHARED 3 +#define MMF_DUMP_MAPPED_PRIVATE 4 +#define MMF_DUMP_MAPPED_SHARED 5 +#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS +#define MMF_DUMP_FILTER_BITS 4 +#define MMF_DUMP_FILTER_MASK \ + (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) +#define MMF_DUMP_FILTER_DEFAULT \ + ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED)) struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ -- cgit v1.2.3 From d9664c95afe5baa92ea56eff6a1c18e7b7a2cbe7 Mon Sep 17 00:00:00 2001 From: Jan Harkes Date: Thu, 19 Jul 2007 01:48:46 -0700 Subject: coda: block signals during upcall processing We ignore signals for about 30 seconds to give userspace a chance to see the upcall. As we did not block signals we ended up in a busy loop for the remainder of the period when a signal is received. Signed-off-by: Jan Harkes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/coda_psdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index b541bb3d1f4b..f28c2f7fd454 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -85,7 +85,6 @@ struct upc_req { u_short uc_opcode; /* copied from data to save lookup */ int uc_unique; wait_queue_head_t uc_sleep; /* process' wait queue */ - unsigned long uc_posttime; }; #define REQ_ASYNC 0x1 -- cgit v1.2.3 From a1b0aa87647493c0201821ab884e86298d5da7d6 Mon Sep 17 00:00:00 2001 From: Jan Harkes Date: Thu, 19 Jul 2007 01:48:50 -0700 Subject: coda: remove struct coda_sb_info The sb_info structure only contains a single pointer to the character device, there is no need for the added indirection. Signed-off-by: Jan Harkes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/coda_psdev.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index f28c2f7fd454..81b2e4c7d7ce 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -8,11 +8,6 @@ struct kstatfs; -struct coda_sb_info -{ - struct venus_comm *sbi_vcomm; -}; - /* communication pending/processing queues */ struct venus_comm { u_long vc_seq; @@ -24,9 +19,9 @@ struct venus_comm { }; -static inline struct coda_sb_info *coda_sbp(struct super_block *sb) +static inline struct venus_comm *coda_vcp(struct super_block *sb) { - return ((struct coda_sb_info *)((sb)->s_fs_info)); + return (struct venus_comm *)((sb)->s_fs_info); } -- cgit v1.2.3 From 3cf01f28c303be34f18cb4f6204cf1bdfe12ba7c Mon Sep 17 00:00:00 2001 From: Jan Harkes Date: Thu, 19 Jul 2007 01:48:51 -0700 Subject: coda: remove statistics counters from /proc/fs/coda Similar information can easily be obtained with strace -c. Signed-off-by: Jan Harkes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/coda_linux.h | 3 -- include/linux/coda_proc.h | 76 ---------------------------------------------- include/linux/coda_psdev.h | 2 -- 3 files changed, 81 deletions(-) delete mode 100644 include/linux/coda_proc.h (limited to 'include/linux') diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h index e4ac016ad272..c4079b403e9e 100644 --- a/include/linux/coda_linux.h +++ b/include/linux/coda_linux.h @@ -43,9 +43,6 @@ int coda_revalidate_inode(struct dentry *); int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); int coda_setattr(struct dentry *, struct iattr *); -/* global variables */ -extern int coda_fake_statfs; - /* this file: heloers */ static __inline__ struct CodaFid *coda_i2f(struct inode *); static __inline__ char *coda_i2s(struct inode *); diff --git a/include/linux/coda_proc.h b/include/linux/coda_proc.h deleted file mode 100644 index 0dc1b0458e75..000000000000 --- a/include/linux/coda_proc.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * coda_statis.h - * - * CODA operation statistics - * - * (c) March, 1998 - * by Michihiro Kuramochi, Zhenyu Xia and Zhanyong Wan - * zhanyong.wan@yale.edu - * - */ - -#ifndef _CODA_PROC_H -#define _CODA_PROC_H - -void coda_sysctl_init(void); -void coda_sysctl_clean(void); - -#include -#include -#include - -/* these four files are presented to show the result of the statistics: - * - * /proc/fs/coda/vfs_stats - * cache_inv_stats - * - * these four files are presented to reset the statistics to 0: - * - * /proc/sys/coda/vfs_stats - * cache_inv_stats - */ - -/* VFS operation statistics */ -struct coda_vfs_stats -{ - /* file operations */ - int open; - int flush; - int release; - int fsync; - - /* dir operations */ - int readdir; - - /* inode operations */ - int create; - int lookup; - int link; - int unlink; - int symlink; - int mkdir; - int rmdir; - int rename; - int permission; - - /* symlink operatoins*/ - int follow_link; - int readlink; -}; - -/* cache invalidation statistics */ -struct coda_cache_inv_stats -{ - int flush; - int purge_user; - int zap_dir; - int zap_file; - int zap_vnode; - int purge_fid; - int replace; -}; - -/* these global variables hold the actual statistics data */ -extern struct coda_vfs_stats coda_vfs_stat; - -#endif /* _CODA_PROC_H */ diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index 81b2e4c7d7ce..aa8f454b3b77 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -69,8 +69,6 @@ int venus_statfs(struct dentry *dentry, struct kstatfs *sfs); /* messages between coda filesystem in kernel and Venus */ -extern int coda_hard; -extern unsigned long coda_timeout; struct upc_req { struct list_head uc_chain; caddr_t uc_data; -- cgit v1.2.3 From 21f8ca3bf6198bd21e3c4cc820af2ccf753a6ec8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Jul 2007 01:48:53 -0700 Subject: fix raw_spinlock_t vs lockdep Use the lockdep infrastructure to track lock contention and other lock statistics. It tracks lock contention events, and the first four unique call-sites that encountered contention. It also measures lock wait-time and hold-time in nanoseconds. The minimum and maximum times are tracked, as well as a total (which together with the number of event can give the avg). All statistics are done per lock class, per write (exclusive state) and per read (shared state). The statistics are collected per-cpu, so that the collection overhead is minimized via having no global cachemisses. This new lock statistics feature is independent of the lock dependency checking traditionally done by lockdep; it just shares the lock tracking code. It is also possible to enable both and runtime disabled either component - thereby avoiding the O(n^2) lock chain walks for instance. This patch: raw_spinlock_t should not use lockdep (and doesn't) since lockdep itself relies on it. Signed-off-by: Peter Zijlstra Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/spinlock_types.h | 4 ++-- include/linux/spinlock_types_up.h | 9 +-------- 2 files changed, 3 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 210549ba4ef4..f6a3a951b79e 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -9,14 +9,14 @@ * Released under the General Public License (GPL). */ -#include - #if defined(CONFIG_SMP) # include #else # include #endif +#include + typedef struct { raw_spinlock_t raw_lock; #if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h index 27644af20b7c..04135b0e198e 100644 --- a/include/linux/spinlock_types_up.h +++ b/include/linux/spinlock_types_up.h @@ -12,14 +12,10 @@ * Released under the General Public License (GPL). */ -#if defined(CONFIG_DEBUG_SPINLOCK) || \ - defined(CONFIG_DEBUG_LOCK_ALLOC) +#ifdef CONFIG_DEBUG_SPINLOCK typedef struct { volatile unsigned int slock; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif } raw_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { 1 } @@ -34,9 +30,6 @@ typedef struct { } raw_spinlock_t; typedef struct { /* no debug version on UP */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif } raw_rwlock_t; #define __RAW_RW_LOCK_UNLOCKED { } -- cgit v1.2.3 From f20786ff4da51e56b1956acf30be2552be266746 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Jul 2007 01:48:56 -0700 Subject: lockstat: core infrastructure Introduce the core lock statistics code. Lock statistics provides lock wait-time and hold-time (as well as the count of corresponding contention and acquisitions events). Also, the first few call-sites that encounter contention are tracked. Lock wait-time is the time spent waiting on the lock. This provides insight into the locking scheme, that is, a heavily contended lock is indicative of a too coarse locking scheme. Lock hold-time is the duration the lock was held, this provides a reference for the wait-time numbers, so they can be put into perspective. 1) lock 2) ... do stuff .. unlock 3) The time between 1 and 2 is the wait-time. The time between 2 and 3 is the hold-time. The lockdep held-lock tracking code is reused, because it already collects locks into meaningful groups (classes), and because it is an existing infrastructure for lock instrumentation. Currently lockdep tracks lock acquisition with two hooks: lock() lock_acquire() _lock() ... code protected by lock ... unlock() lock_release() _unlock() We need to extend this with two more hooks, in order to measure contention. lock_contended() - used to measure contention events lock_acquired() - completion of the contention These are then placed the following way: lock() lock_acquire() if (!_try_lock()) lock_contended() _lock() lock_acquired() ... do locked stuff ... unlock() lock_release() _unlock() (Note: the try_lock() 'trick' is used to avoid instrumenting all platform dependent lock primitive implementations.) It is also possible to toggle the two lockdep features at runtime using: /proc/sys/kernel/prove_locking /proc/sys/kernel/lock_stat (esp. turning off the O(n^2) prove_locking functionaliy can help) [akpm@linux-foundation.org: build fixes] [akpm@linux-foundation.org: nuke unneeded ifdefs] Signed-off-by: Peter Zijlstra Acked-by: Ingo Molnar Acked-by: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockdep.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 14c937d345cb..8f946f614f8e 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -9,6 +9,7 @@ #define __LINUX_LOCKDEP_H struct task_struct; +struct lockdep_map; #ifdef CONFIG_LOCKDEP @@ -114,8 +115,32 @@ struct lock_class { const char *name; int name_version; + +#ifdef CONFIG_LOCK_STAT + unsigned long contention_point[4]; +#endif +}; + +#ifdef CONFIG_LOCK_STAT +struct lock_time { + s64 min; + s64 max; + s64 total; + unsigned long nr; }; +struct lock_class_stats { + unsigned long contention_point[4]; + struct lock_time read_waittime; + struct lock_time write_waittime; + struct lock_time read_holdtime; + struct lock_time write_holdtime; +}; + +struct lock_class_stats lock_stats(struct lock_class *class); +void clear_lock_stats(struct lock_class *class); +#endif + /* * Map the lock object (the lock instance) to the lock-class object. * This is embedded into specific lock instances: @@ -165,6 +190,10 @@ struct held_lock { unsigned long acquire_ip; struct lockdep_map *instance; +#ifdef CONFIG_LOCK_STAT + u64 waittime_stamp; + u64 holdtime_stamp; +#endif /* * The lock-stack is unified in that the lock chains of interrupt * contexts nest ontop of process context chains, but we 'separate' @@ -281,6 +310,30 @@ struct lock_class_key { }; #endif /* !LOCKDEP */ +#ifdef CONFIG_LOCK_STAT + +extern void lock_contended(struct lockdep_map *lock, unsigned long ip); +extern void lock_acquired(struct lockdep_map *lock); + +#define LOCK_CONTENDED(_lock, try, lock) \ +do { \ + if (!try(_lock)) { \ + lock_contended(&(_lock)->dep_map, _RET_IP_); \ + lock(_lock); \ + lock_acquired(&(_lock)->dep_map); \ + } \ +} while (0) + +#else /* CONFIG_LOCK_STAT */ + +#define lock_contended(lockdep_map, ip) do {} while (0) +#define lock_acquired(lockdep_map) do {} while (0) + +#define LOCK_CONTENDED(_lock, try, lock) \ + lock(_lock) + +#endif /* CONFIG_LOCK_STAT */ + #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS) extern void early_init_irq_lock_class(void); #else -- cgit v1.2.3 From 4b32d0a4e9ec07808a5c406a416c6576c986b047 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Jul 2007 01:48:59 -0700 Subject: lockdep: various fixes - update the copyright notices - use the default hash function - fix a thinko in a BUILD_BUG_ON - add a WARN_ON to spot inconsitent naming - fix a termination issue in /proc/lock_stat [akpm@linux-foundation.org: cleanups] Signed-off-by: Peter Zijlstra Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockdep.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 8f946f614f8e..3d3386b88b6a 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -1,7 +1,8 @@ /* * Runtime locking correctness validator * - * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra * * see Documentation/lockdep-design.txt for more details. */ -- cgit v1.2.3 From 96645678cd726e87ce42a0664de71e047e32bca4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Jul 2007 01:49:00 -0700 Subject: lockstat: measure lock bouncing __acquire | lock _____ | \ | __contended | | | wait | _______/ |/ | __acquired | __release | unlock We measure acquisition and contention bouncing. This is done by recording a cpu stamp in each lock instance. Contention bouncing requires the cpu stamp to be set on acquisition. Hence we move __acquired into the generic path. __acquired is then used to measure acquisition bouncing by comparing the current cpu with the old stamp before replacing it. __contended is used to measure contention bouncing (only useful for preemptable locks) [akpm@linux-foundation.org: cleanups] Signed-off-by: Peter Zijlstra Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockdep.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 3d3386b88b6a..0e843bf65877 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -130,12 +130,24 @@ struct lock_time { unsigned long nr; }; +enum bounce_type { + bounce_acquired_write, + bounce_acquired_read, + bounce_contended_write, + bounce_contended_read, + nr_bounce_types, + + bounce_acquired = bounce_acquired_write, + bounce_contended = bounce_contended_write, +}; + struct lock_class_stats { unsigned long contention_point[4]; struct lock_time read_waittime; struct lock_time write_waittime; struct lock_time read_holdtime; struct lock_time write_holdtime; + unsigned long bounces[nr_bounce_types]; }; struct lock_class_stats lock_stats(struct lock_class *class); @@ -150,6 +162,9 @@ struct lockdep_map { struct lock_class_key *key; struct lock_class *class_cache; const char *name; +#ifdef CONFIG_LOCK_STAT + int cpu; +#endif }; /* @@ -321,8 +336,8 @@ do { \ if (!try(_lock)) { \ lock_contended(&(_lock)->dep_map, _RET_IP_); \ lock(_lock); \ - lock_acquired(&(_lock)->dep_map); \ } \ + lock_acquired(&(_lock)->dep_map); \ } while (0) #else /* CONFIG_LOCK_STAT */ -- cgit v1.2.3 From 3b5ad0797c0e4049001f961a8b58f1d0ce532072 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Jul 2007 01:49:02 -0700 Subject: stacktrace: fix header file for !CONFIG_STACKTRACE The print_stack_trace macro in stacktrace.h has a wrong number of arguments, fix it. Signed-off-by: Johannes Berg Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/stacktrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 1d2b084c0185..e7fa657d0c49 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -13,7 +13,7 @@ extern void save_stack_trace(struct stack_trace *trace); extern void print_stack_trace(struct stack_trace *trace, int spaces); #else # define save_stack_trace(trace) do { } while (0) -# define print_stack_trace(trace) do { } while (0) +# define print_stack_trace(trace, spaces) do { } while (0) #endif #endif -- cgit v1.2.3 From d688abf50bd5a30d2c44dea2a72dd59052cd3cce Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 19 Jul 2007 01:49:17 -0700 Subject: move page writeback acounting out of macros page-writeback accounting is presently performed in the page-flags macros. This is inconsistent and a bit ugly and makes it awkward to implement per-backing_dev under-writeback page accounting. So move this accounting down to the callsite(s). Acked-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 38 ++++++++------------------------------ 1 file changed, 8 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a454176c3e30..209d3a47f50f 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -189,37 +189,15 @@ static inline void SetPageUptodate(struct page *page) #define __SetPagePrivate(page) __set_bit(PG_private, &(page)->flags) #define __ClearPagePrivate(page) __clear_bit(PG_private, &(page)->flags) +/* + * Only test-and-set exist for PG_writeback. The unconditional operators are + * risky: they bypass page accounting. + */ #define PageWriteback(page) test_bit(PG_writeback, &(page)->flags) -#define SetPageWriteback(page) \ - do { \ - if (!test_and_set_bit(PG_writeback, \ - &(page)->flags)) \ - inc_zone_page_state(page, NR_WRITEBACK); \ - } while (0) -#define TestSetPageWriteback(page) \ - ({ \ - int ret; \ - ret = test_and_set_bit(PG_writeback, \ - &(page)->flags); \ - if (!ret) \ - inc_zone_page_state(page, NR_WRITEBACK); \ - ret; \ - }) -#define ClearPageWriteback(page) \ - do { \ - if (test_and_clear_bit(PG_writeback, \ - &(page)->flags)) \ - dec_zone_page_state(page, NR_WRITEBACK); \ - } while (0) -#define TestClearPageWriteback(page) \ - ({ \ - int ret; \ - ret = test_and_clear_bit(PG_writeback, \ - &(page)->flags); \ - if (ret) \ - dec_zone_page_state(page, NR_WRITEBACK); \ - ret; \ - }) +#define TestSetPageWriteback(page) test_and_set_bit(PG_writeback, \ + &(page)->flags) +#define TestClearPageWriteback(page) test_and_clear_bit(PG_writeback, \ + &(page)->flags) #define PageBuddy(page) test_bit(PG_buddy, &(page)->flags) #define __SetPageBuddy(page) __set_bit(PG_buddy, &(page)->flags) -- cgit v1.2.3 From e22841c637dc8b308b40f59d64a5b6683d458ab7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 19 Jul 2007 01:49:20 -0700 Subject: knfsd: move EX_RDONLY out of header EX_RDONLY is only called in one place; just put it there. Signed-off-by: "J. Bruce Fields" Acked-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/export.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index 78feb7beff75..fb4e93016666 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -116,18 +116,6 @@ struct svc_expkey { #define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE) #define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES) -static inline int EX_RDONLY(struct svc_export *exp, struct svc_rqst *rqstp) -{ - struct exp_flavor_info *f; - struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; - - for (f = exp->ex_flavors; f < end; f++) { - if (f->pseudoflavor == rqstp->rq_flavor) - return f->flags & NFSEXP_READONLY; - } - return exp->ex_flags & NFSEXP_READONLY; -} - __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); /* -- cgit v1.2.3 From c7d51402d2a64c5b96531f9900bb368020ebbbbb Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 19 Jul 2007 01:49:20 -0700 Subject: knfsd: clean up EX_RDONLY Share a little common code, reverse the arguments for consistency, drop the unnecessary "inline", and lowercase the name. Signed-off-by: "J. Bruce Fields" Acked-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/export.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index fb4e93016666..5cd192469096 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -116,6 +116,7 @@ struct svc_expkey { #define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE) #define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES) +int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp); __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); /* -- cgit v1.2.3 From 07ad157f6e5d228be78acd5cea0291e5d0360398 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 19 Jul 2007 01:49:22 -0700 Subject: lguest: the guest code lguest is a simple hypervisor for Linux on Linux. Unlike kvm it doesn't need VT/SVM hardware. Unlike Xen it's simply "modprobe and go". Unlike both, it's 5000 lines and self-contained. Performance is ok, but not great (-30% on kernel compile). But given its hackability, I expect this to improve, along with the paravirt_ops code which it supplies a complete example for. There's also a 64-bit version being worked on and other craziness. But most of all, lguest is awesome fun! Too much of the kernel is a big ball of hair. lguest is simple enough to dive into and hack, plus has some warts which scream "fork me!". This patch: This is the code and headers required to make an i386 kernel an lguest guest. Signed-off-by: Rusty Russell Cc: Andi Kleen Cc: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lguest.h | 85 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/lguest_bus.h | 48 ++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 include/linux/lguest.h create mode 100644 include/linux/lguest_bus.h (limited to 'include/linux') diff --git a/include/linux/lguest.h b/include/linux/lguest.h new file mode 100644 index 000000000000..f30c04fc22b7 --- /dev/null +++ b/include/linux/lguest.h @@ -0,0 +1,85 @@ +/* Things the lguest guest needs to know. Note: like all lguest interfaces, + * this is subject to wild and random change between versions. */ +#ifndef _ASM_LGUEST_H +#define _ASM_LGUEST_H + +/* These are randomly chosen numbers which indicate we're an lguest at boot */ +#define LGUEST_MAGIC_EBP 0x4C687970 +#define LGUEST_MAGIC_EDI 0x652D4D65 +#define LGUEST_MAGIC_ESI 0xFFFFFFFF + +#ifndef __ASSEMBLY__ +#include + +#define LHCALL_FLUSH_ASYNC 0 +#define LHCALL_LGUEST_INIT 1 +#define LHCALL_CRASH 2 +#define LHCALL_LOAD_GDT 3 +#define LHCALL_NEW_PGTABLE 4 +#define LHCALL_FLUSH_TLB 5 +#define LHCALL_LOAD_IDT_ENTRY 6 +#define LHCALL_SET_STACK 7 +#define LHCALL_TS 8 +#define LHCALL_TIMER_READ 9 +#define LHCALL_HALT 10 +#define LHCALL_GET_WALLCLOCK 11 +#define LHCALL_BIND_DMA 12 +#define LHCALL_SEND_DMA 13 +#define LHCALL_SET_PTE 14 +#define LHCALL_SET_PMD 15 +#define LHCALL_LOAD_TLS 16 + +#define LGUEST_TRAP_ENTRY 0x1F + +static inline unsigned long +hcall(unsigned long call, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ + asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY) + : "=a"(call) + : "a"(call), "d"(arg1), "b"(arg2), "c"(arg3) + : "memory"); + return call; +} + +void async_hcall(unsigned long call, + unsigned long arg1, unsigned long arg2, unsigned long arg3); + +/* Can't use our min() macro here: needs to be a constant */ +#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) + +#define LHCALL_RING_SIZE 64 +struct hcall_ring +{ + u32 eax, edx, ebx, ecx; +}; + +/* All the good stuff happens here: guest registers it with LGUEST_INIT */ +struct lguest_data +{ +/* Fields which change during running: */ + /* 512 == enabled (same as eflags) */ + unsigned int irq_enabled; + /* Interrupts blocked by guest. */ + DECLARE_BITMAP(blocked_interrupts, LGUEST_IRQS); + + /* Virtual address of page fault. */ + unsigned long cr2; + + /* Async hypercall ring. 0xFF == done, 0 == pending. */ + u8 hcall_status[LHCALL_RING_SIZE]; + struct hcall_ring hcalls[LHCALL_RING_SIZE]; + +/* Fields initialized by the hypervisor at boot: */ + /* Memory not to try to access */ + unsigned long reserve_mem; + /* ID of this guest (used by network driver to set ethernet address) */ + u16 guestid; + +/* Fields initialized by the guest at boot: */ + /* Instruction range to suppress interrupts even if enabled */ + unsigned long noirq_start, noirq_end; +}; +extern struct lguest_data lguest_data; +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_LGUEST_H */ diff --git a/include/linux/lguest_bus.h b/include/linux/lguest_bus.h new file mode 100644 index 000000000000..c9b4e05fee49 --- /dev/null +++ b/include/linux/lguest_bus.h @@ -0,0 +1,48 @@ +#ifndef _ASM_LGUEST_DEVICE_H +#define _ASM_LGUEST_DEVICE_H +/* Everything you need to know about lguest devices. */ +#include +#include +#include + +struct lguest_device { + /* Unique busid, and index into lguest_page->devices[] */ + unsigned int index; + + struct device dev; + + /* Driver can hang data off here. */ + void *private; +}; + +/* By convention, each device can use irq index+1 if it wants to. */ +static inline int lgdev_irq(const struct lguest_device *dev) +{ + return dev->index + 1; +} + +/* dma args must not be vmalloced! */ +void lguest_send_dma(unsigned long key, struct lguest_dma *dma); +int lguest_bind_dma(unsigned long key, struct lguest_dma *dmas, + unsigned int num, u8 irq); +void lguest_unbind_dma(unsigned long key, struct lguest_dma *dmas); + +/* Map the virtual device space */ +void *lguest_map(unsigned long phys_addr, unsigned long pages); +void lguest_unmap(void *); + +struct lguest_driver { + const char *name; + struct module *owner; + u16 device_type; + int (*probe)(struct lguest_device *dev); + void (*remove)(struct lguest_device *dev); + + struct device_driver drv; +}; + +extern int register_lguest_driver(struct lguest_driver *drv); +extern void unregister_lguest_driver(struct lguest_driver *drv); + +extern struct lguest_device_desc *lguest_devices; /* Just past max_pfn */ +#endif /* _ASM_LGUEST_DEVICE_H */ -- cgit v1.2.3 From d7e28ffe6c74416b54345d6004fd0964c115b12c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 19 Jul 2007 01:49:23 -0700 Subject: lguest: the host code This is the code for the "lg.ko" module, which allows lguest guests to be launched. [akpm@linux-foundation.org: update for futex-new-private-futexes] [akpm@linux-foundation.org: build fix] [jmorris@namei.org: lguest: use hrtimers] [akpm@linux-foundation.org: x86_64 build fix] Signed-off-by: Rusty Russell Cc: Andi Kleen Cc: Eric Dumazet Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lguest.h | 12 +++---- include/linux/lguest_launcher.h | 73 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 6 deletions(-) create mode 100644 include/linux/lguest_launcher.h (limited to 'include/linux') diff --git a/include/linux/lguest.h b/include/linux/lguest.h index f30c04fc22b7..500aace21ca7 100644 --- a/include/linux/lguest.h +++ b/include/linux/lguest.h @@ -3,11 +3,6 @@ #ifndef _ASM_LGUEST_H #define _ASM_LGUEST_H -/* These are randomly chosen numbers which indicate we're an lguest at boot */ -#define LGUEST_MAGIC_EBP 0x4C687970 -#define LGUEST_MAGIC_EDI 0x652D4D65 -#define LGUEST_MAGIC_ESI 0xFFFFFFFF - #ifndef __ASSEMBLY__ #include @@ -20,7 +15,7 @@ #define LHCALL_LOAD_IDT_ENTRY 6 #define LHCALL_SET_STACK 7 #define LHCALL_TS 8 -#define LHCALL_TIMER_READ 9 +#define LHCALL_SET_CLOCKEVENT 9 #define LHCALL_HALT 10 #define LHCALL_GET_WALLCLOCK 11 #define LHCALL_BIND_DMA 12 @@ -29,6 +24,9 @@ #define LHCALL_SET_PMD 15 #define LHCALL_LOAD_TLS 16 +#define LG_CLOCK_MIN_DELTA 100UL +#define LG_CLOCK_MAX_DELTA ULONG_MAX + #define LGUEST_TRAP_ENTRY 0x1F static inline unsigned long @@ -75,6 +73,8 @@ struct lguest_data unsigned long reserve_mem; /* ID of this guest (used by network driver to set ethernet address) */ u16 guestid; + /* KHz for the TSC clock. */ + u32 tsc_khz; /* Fields initialized by the guest at boot: */ /* Instruction range to suppress interrupts even if enabled */ diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h new file mode 100644 index 000000000000..0ba414a40c80 --- /dev/null +++ b/include/linux/lguest_launcher.h @@ -0,0 +1,73 @@ +#ifndef _ASM_LGUEST_USER +#define _ASM_LGUEST_USER +/* Everything the "lguest" userspace program needs to know. */ +/* They can register up to 32 arrays of lguest_dma. */ +#define LGUEST_MAX_DMA 32 +/* At most we can dma 16 lguest_dma in one op. */ +#define LGUEST_MAX_DMA_SECTIONS 16 + +/* How many devices? Assume each one wants up to two dma arrays per device. */ +#define LGUEST_MAX_DEVICES (LGUEST_MAX_DMA/2) + +struct lguest_dma +{ + /* 0 if free to be used, filled by hypervisor. */ + u32 used_len; + unsigned long addr[LGUEST_MAX_DMA_SECTIONS]; + u16 len[LGUEST_MAX_DMA_SECTIONS]; +}; + +struct lguest_block_page +{ + /* 0 is a read, 1 is a write. */ + int type; + u32 sector; /* Offset in device = sector * 512. */ + u32 bytes; /* Length expected to be read/written in bytes */ + /* 0 = pending, 1 = done, 2 = done, error */ + int result; + u32 num_sectors; /* Disk length = num_sectors * 512 */ +}; + +/* There is a shared page of these. */ +struct lguest_net +{ + /* Simply the mac address (with multicast bit meaning promisc). */ + unsigned char mac[6]; +}; + +/* Where the Host expects the Guest to SEND_DMA console output to. */ +#define LGUEST_CONSOLE_DMA_KEY 0 + +/* We have a page of these descriptors in the lguest_device page. */ +struct lguest_device_desc { + u16 type; +#define LGUEST_DEVICE_T_CONSOLE 1 +#define LGUEST_DEVICE_T_NET 2 +#define LGUEST_DEVICE_T_BLOCK 3 + + u16 features; +#define LGUEST_NET_F_NOCSUM 0x4000 /* Don't bother checksumming */ +#define LGUEST_DEVICE_F_RANDOMNESS 0x8000 /* IRQ is fairly random */ + + u16 status; +/* 256 and above are device specific. */ +#define LGUEST_DEVICE_S_ACKNOWLEDGE 1 /* We have seen device. */ +#define LGUEST_DEVICE_S_DRIVER 2 /* We have found a driver */ +#define LGUEST_DEVICE_S_DRIVER_OK 4 /* Driver says OK! */ +#define LGUEST_DEVICE_S_REMOVED 8 /* Device has gone away. */ +#define LGUEST_DEVICE_S_REMOVED_ACK 16 /* Driver has been told. */ +#define LGUEST_DEVICE_S_FAILED 128 /* Something actually failed */ + + u16 num_pages; + u32 pfn; +}; + +/* Write command first word is a request. */ +enum lguest_req +{ + LHREQ_INITIALIZE, /* + pfnlimit, pgdir, start, pageoffset */ + LHREQ_GETDMA, /* + addr (returns &lguest_dma, irq in ->used_len) */ + LHREQ_IRQ, /* + irq */ + LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ +}; +#endif /* _ASM_LGUEST_USER */ -- cgit v1.2.3 From c0d121720220584bba2876b032e58a076b843fa1 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 19 Jul 2007 01:49:46 -0700 Subject: drivers/edac: add new nmi rescan Provides a way for NMI reported errors on x86 to notify the EDAC subsystem pending ECC errors by writing to a software state variable. Here's the reworked patch. I added an EDAC stub to the kernel so we can have variables that are in the kernel even if EDAC is a module. I also implemented the idea of using the chip driver to select error detection mode via module parameter and eliminate the kernel compile option. Please review/test. Thx! Also, I only made changes to some of the chipset drivers since I am unfamiliar with the other ones. We can add similar changes as we go. Signed-off-by: Dave Jiang Signed-off-by: Douglas Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/edac.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 include/linux/edac.h (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h new file mode 100644 index 000000000000..c8b92d79f884 --- /dev/null +++ b/include/linux/edac.h @@ -0,0 +1,29 @@ +/* + * Generic EDAC defs + * + * Author: Dave Jiang + * + * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + * + */ +#ifndef _LINUX_EDAC_H_ +#define _LINUX_EDAC_H_ + +#include + +#define EDAC_OPSTATE_INVAL -1 +#define EDAC_OPSTATE_POLL 0 +#define EDAC_OPSTATE_NMI 1 +#define EDAC_OPSTATE_INT 2 + +extern int edac_op_state; +extern atomic_t edac_handlers; +extern atomic_t edac_err_assert; + +extern int edac_handler_set(void); +extern void edac_atomic_assert_error(void); + +#endif -- cgit v1.2.3 From 535c6a53035d8911f6b90455550c5fde0da7b866 Mon Sep 17 00:00:00 2001 From: Jason Uhlenkott Date: Thu, 19 Jul 2007 01:49:48 -0700 Subject: drivers/edac: new inte 30x0 MC driver Here's a driver for the Intel 3000 and 3010 memory controllers, relative to today's Sourceforge code drop. This has only had light testing (I've yet to actually see it handle a memory error) but it detects my hardware correctly. Signed-off-by: Jason Uhlenkott Signed-off-by: Douglas Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2c7add169539..7ec01b7525b6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2209,6 +2209,7 @@ #define PCI_DEVICE_ID_INTEL_82915GM_IG 0x2592 #define PCI_DEVICE_ID_INTEL_82945G_HB 0x2770 #define PCI_DEVICE_ID_INTEL_82945G_IG 0x2772 +#define PCI_DEVICE_ID_INTEL_3000_HB 0x2778 #define PCI_DEVICE_ID_INTEL_82945GM_HB 0x27A0 #define PCI_DEVICE_ID_INTEL_82945GM_IG 0x27A2 #define PCI_DEVICE_ID_INTEL_ICH6_0 0x2640 -- cgit v1.2.3 From 66ee2f940ac8ab25f0c43a1e717d25dc46bfe74d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 19 Jul 2007 01:49:54 -0700 Subject: drivers/edac: mod assert_error check Change error check and clear variable from an atomic to an int Signed-off-by: Dave Jiang Signed-off-by: Douglas Thompson Signed-off-by: Linus Torvalds --- include/linux/edac.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index c8b92d79f884..eab451e69a91 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -20,8 +20,8 @@ #define EDAC_OPSTATE_INT 2 extern int edac_op_state; +extern int edac_err_assert; extern atomic_t edac_handlers; -extern atomic_t edac_err_assert; extern int edac_handler_set(void); extern void edac_atomic_assert_error(void); -- cgit v1.2.3 From 53078ca84b1c01f36c306d1f52e2f88c7bb2f9e4 Mon Sep 17 00:00:00 2001 From: Douglas Thompson Date: Thu, 19 Jul 2007 01:50:17 -0700 Subject: include/linux/pci_id.h: add amd northbridge defines pci_ids.h needs two of the AMD NB device-ids namely, Addressmap and the Memory Controller devices This patch adds those to the pci_id.h include file Signed-off-by: Douglas Thompson Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 7ec01b7525b6..b15c6498fe67 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -495,6 +495,8 @@ #define PCI_VENDOR_ID_AMD 0x1022 #define PCI_DEVICE_ID_AMD_K8_NB 0x1100 +#define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP 0x1101 +#define PCI_DEVICE_ID_AMD_K8_NB_MEMCTL 0x1102 #define PCI_DEVICE_ID_AMD_K8_NB_MISC 0x1103 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 -- cgit v1.2.3 From e24b8cb4fa2bb779bdf48656152366b6f52f748f Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 8 Jul 2007 14:26:37 +0200 Subject: i2c: Delete the i2c-isa pseudo bus driver There are no users of i2c-isa left, so we can finally get rid of it. Signed-off-by: Jean Delvare --- include/linux/i2c-isa.h | 36 ------------------------------------ include/linux/i2c.h | 1 - 2 files changed, 37 deletions(-) delete mode 100644 include/linux/i2c-isa.h (limited to 'include/linux') diff --git a/include/linux/i2c-isa.h b/include/linux/i2c-isa.h deleted file mode 100644 index 67e3598c4cec..000000000000 --- a/include/linux/i2c-isa.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * i2c-isa.h - definitions for the i2c-isa pseudo-i2c-adapter interface - * - * Copyright (C) 2005 Jean Delvare - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef _LINUX_I2C_ISA_H -#define _LINUX_I2C_ISA_H - -#include - -extern int i2c_isa_add_driver(struct i2c_driver *driver); -extern int i2c_isa_del_driver(struct i2c_driver *driver); - -/* Detect whether we are on the isa bus. This is only useful to hybrid - (i2c+isa) drivers. */ -#define i2c_is_isa_adapter(adapptr) \ - ((adapptr)->id == I2C_HW_ISA) -#define i2c_is_isa_client(clientptr) \ - i2c_is_isa_adapter((clientptr)->adapter) - -#endif /* _LINUX_I2C_ISA_H */ diff --git a/include/linux/i2c.h b/include/linux/i2c.h index cae7d618030c..47f40376a3c7 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -362,7 +362,6 @@ struct i2c_client_address_data { /* The numbers to use to set I2C bus address */ #define ANY_I2C_BUS 0xffff -#define ANY_I2C_ISA_BUS 9191 /* ----- functions exported by i2c.o */ -- cgit v1.2.3 From be879c4e249a8875d7129f3b0c1bb62584dafbd8 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 11 Jul 2007 18:39:02 -0400 Subject: SUNRPC: move bkl locking and xdr proc invocation into a common helper Since every invocation of xdr encode or decode functions takes the BKL now, there's a lot of redundant lock_kernel/unlock_kernel pairs that we can pull out into a common function. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 9e340fa23c06..c6b53d181bfa 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -12,6 +12,7 @@ #include #include #include +#include /* * Buffer adjustment @@ -35,6 +36,21 @@ struct xdr_netobj { */ typedef int (*kxdrproc_t)(void *rqstp, __be32 *data, void *obj); +/* + * We're still requiring the BKL in the xdr code until it's been + * more carefully audited, at which point this wrapper will become + * unnecessary. + */ +static inline int rpc_call_xdrproc(kxdrproc_t xdrproc, void *rqstp, __be32 *data, void *obj) +{ + int ret; + + lock_kernel(); + ret = xdrproc(rqstp, data, obj); + unlock_kernel(); + return ret; +} + /* * Basic structure for transmission/reception of a client XDR message. * Features a header (for a linear buffer containing RPC headers -- cgit v1.2.3 From 4fdc17b2a7f4d9db5b08e0f963d0027f714e4104 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 Jul 2007 15:39:57 -0400 Subject: NFS: Introduce struct nfs_removeargs+nfs_removeres We need a common structure for setting up an unlink() rpc call in order to fix the asynchronous unlink code. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 38d77681cf27..7babcb16300b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -277,6 +277,21 @@ struct nfs_writeres { const struct nfs_server *server; }; +/* + * Common arguments to the unlink call + */ +struct nfs_removeargs { + const struct nfs_fh *fh; + struct qstr name; + const u32 * bitmask; +}; + +struct nfs_removeres { + const struct nfs_server *server; + struct nfs4_change_info cinfo; + struct nfs_fattr dir_attr; +}; + /* * Argument struct for decode_entry function */ @@ -631,18 +646,6 @@ struct nfs4_readlink { struct page ** pages; /* zero-copy data */ }; -struct nfs4_remove_arg { - const struct nfs_fh * fh; - const struct qstr * name; - const u32 * bitmask; -}; - -struct nfs4_remove_res { - const struct nfs_server * server; - struct nfs4_change_info cinfo; - struct nfs_fattr * dir_attr; -}; - struct nfs4_rename_arg { const struct nfs_fh * old_dir; const struct nfs_fh * new_dir; -- cgit v1.2.3 From e4eff1a622edd6ab7b73acd5d8763aa2fa3fee49 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 14 Jul 2007 15:39:58 -0400 Subject: SUNRPC: Clean up the sillyrename code Fix a couple of bugs: - Don't rely on the parent dentry still being valid when the call completes. Fixes a race with shrink_dcache_for_umount_subtree() - Don't remove the file if the filehandle has been labelled as stale. Fix a couple of inefficiencies - Remove the global list of sillyrenamed files. Instead we can cache the sillyrename information in the dentry->d_fsdata - Move common code from unlink_setup/unlink_done into fs/nfs/unlink.c Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 4 ++-- include/linux/nfs_xdr.h | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c098ae194f79..9ba4aec37c50 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -407,8 +407,8 @@ extern void nfs_release_automount_timer(void); /* * linux/fs/nfs/unlink.c */ -extern int nfs_async_unlink(struct dentry *); -extern void nfs_complete_unlink(struct dentry *); +extern int nfs_async_unlink(struct inode *dir, struct dentry *dentry); +extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); /* * linux/fs/nfs/write.c diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7babcb16300b..cf74a4db84a5 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -791,9 +791,8 @@ struct nfs_rpc_ops { int (*create) (struct inode *, struct dentry *, struct iattr *, int, struct nameidata *); int (*remove) (struct inode *, struct qstr *); - int (*unlink_setup) (struct rpc_message *, - struct dentry *, struct qstr *); - int (*unlink_done) (struct dentry *, struct rpc_task *); + void (*unlink_setup) (struct rpc_message *, struct inode *dir); + int (*unlink_done) (struct rpc_task *, struct inode *); int (*rename) (struct inode *, struct qstr *, struct inode *, struct qstr *); int (*link) (struct inode *, struct inode *, struct qstr *); -- cgit v1.2.3 From e436d80085133858bf2613a630365e8a0459fd58 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 19 Jul 2007 21:28:35 +0200 Subject: [PATCH] sched: implement cpu_clock(cpu) high-speed time source Implement the cpu_clock(cpu) interface for kernel-internal use: high-speed (but slightly incorrect) per-cpu clock constructed from sched_clock(). This API, unused at the moment, will be used in the future by blktrace, by the softlockup-watchdog, by printk and by lockstat. Signed-off-by: Ingo Molnar --- include/linux/sched.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 94f624aef017..33b9b4841ee7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1348,6 +1348,13 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) #endif extern unsigned long long sched_clock(void); + +/* + * For kernel-internal use: high-speed (but slightly incorrect) per-cpu + * clock constructed from sched_clock(): + */ +extern unsigned long long cpu_clock(int cpu); + extern unsigned long long task_sched_runtime(struct task_struct *task); -- cgit v1.2.3 From 626ac545c12e5f9bffe93086d1d03d26c99987ea Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Tue, 17 Jul 2007 15:28:17 -0400 Subject: user namespace: fix copy_user_ns return value When a CONFIG_USER_NS=n and a user tries to unshare some namespace other than the user namespace, the dummy copy_user_ns returns NULL rather than the old_ns. This value then gets assigned to task->nsproxy->user_ns, so that a subsequent setuid, which uses task->nsproxy->user_ns, causes a NULL pointer deref. Fix this by returning old_ns. Signed-off-by: Serge E. Hallyn Signed-off-by: Linus Torvalds --- include/linux/user_namespace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index bb320573bb9e..1101b0ce878f 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -49,7 +49,7 @@ static inline struct user_namespace *copy_user_ns(int flags, if (flags & CLONE_NEWUSER) return ERR_PTR(-EINVAL); - return NULL; + return old_ns; } static inline void put_user_ns(struct user_namespace *ns) -- cgit v1.2.3 From 342cdb6d4739cee430efc3eafcacd1605db66036 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 20 Jul 2007 01:11:55 +0200 Subject: ide: make ide_get_best_pio_mode() print info if overriding PIO mode * Print info about overriding PIO mode in ide_get_best_pio_mode(). * Remove info about overriding PIO mode from cmd64{0,x} host drivers. * Remove no longer needed ide_pio_data_t.overridden field. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 19ab25804056..83a117d673c7 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1375,7 +1375,6 @@ typedef struct ide_pio_timings_s { typedef struct ide_pio_data_s { u8 pio_mode; u8 use_iordy; - u8 overridden; unsigned int cycle_time; } ide_pio_data_t; -- cgit v1.2.3 From 2229833c1365346b64357a9263fa724f74f5e376 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 20 Jul 2007 01:11:55 +0200 Subject: ide: add ide_dev_has_iordy() helper (take 4) * Add ide_dev_has_iordy() helper and use it sl82c105 host driver. * Remove no longer needed ide_pio_data_t.use_iordy field. v2/v3: * Fix issues noticed by Sergei: - correct patch description - fix comment in ide_get_best_pio_mode() v4: * Fix "ata_" prefix (Noticed by Jeff). Acked-by: Sergei Shtylyov Cc: Jeff Garzik Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 83a117d673c7..349c22a1fbc5 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1363,6 +1363,11 @@ extern void ide_toggle_bounce(ide_drive_t *drive, int on); extern int ide_set_xfer_rate(ide_drive_t *drive, u8 rate); int ide_use_fast_pio(ide_drive_t *); +static inline int ide_dev_has_iordy(struct hd_driveid *id) +{ + return ((id->field_valid & 2) && (id->capability & 8)) ? 1 : 0; +} + u8 ide_dump_status(ide_drive_t *, const char *, u8); typedef struct ide_pio_timings_s { @@ -1374,7 +1379,6 @@ typedef struct ide_pio_timings_s { typedef struct ide_pio_data_s { u8 pio_mode; - u8 use_iordy; unsigned int cycle_time; } ide_pio_data_t; -- cgit v1.2.3 From a5d8c5c834d3cabf4b7b477c3f6ee923c25026fc Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 20 Jul 2007 01:11:55 +0200 Subject: ide: add ide_pci_device_t.host_flags (take 2) * Rename ide_pci_device_t.flags to ide_pci_device_t.host_flags and IDEPCI_FLAG_ISA_PORTS flag to IDE_HFLAG_ISA_PORTS. * Add IDE_HFLAG_SINGLE flag for single channel devices. * Convert core code and all IDE PCI drivers to use IDE_HFLAG_SINGLE and remove no longer needed ide_pci_device_t.channels field. v2: * Fix issues noticed by Sergei: - correct code alignment in scc_pata.c - s/IDE_HFLAG_SINGLE/~IDE_HFLAG_SINGLE/ in serverworks.c Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 349c22a1fbc5..498dc57627fa 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1244,7 +1244,8 @@ typedef struct ide_pci_enablebit_s { enum { /* Uses ISA control ports not PCI ones. */ - IDEPCI_FLAG_ISA_PORTS = (1 << 0), + IDE_HFLAG_ISA_PORTS = (1 << 0), + IDE_HFLAG_SINGLE = (1 << 1), }; typedef struct ide_pci_device_s { @@ -1256,13 +1257,12 @@ typedef struct ide_pci_device_s { void (*init_hwif)(ide_hwif_t *); void (*init_dma)(ide_hwif_t *, unsigned long); void (*fixup)(ide_hwif_t *); - u8 channels; u8 autodma; ide_pci_enablebit_t enablebits[2]; u8 bootable; unsigned int extra; struct ide_pci_device_s *next; - u8 flags; + u8 host_flags; u8 udma_mask; } ide_pci_device_t; -- cgit v1.2.3 From 7dd00083b1160b560fa2a0a486799b57baa5d035 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 20 Jul 2007 01:11:56 +0200 Subject: ide: add ide_pio_cycle_time() helper (take 2) * Add ide_pio_cycle_time() helper. * Use it in ali14xx/ht6560b/qd65xx/cmd64{0,x}/sl82c105 and pmac host drivers (previously cycle time given by the device was only used for "pio" == 255). * Remove no longer needed ide_pio_data_t.cycle_time field. v2: * Fix "ata_" prefix (Noticed by Jeff). Acked-by: Sergei Shtylyov Cc: Jeff Garzik Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 498dc57627fa..0afa52c14ffa 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1379,9 +1379,9 @@ typedef struct ide_pio_timings_s { typedef struct ide_pio_data_s { u8 pio_mode; - unsigned int cycle_time; } ide_pio_data_t; +unsigned int ide_pio_cycle_time(ide_drive_t *, u8); extern u8 ide_get_best_pio_mode (ide_drive_t *drive, u8 mode_wanted, u8 max_mode, ide_pio_data_t *d); extern const ide_pio_timings_t ide_pio_timings[6]; -- cgit v1.2.3 From 2134758d2a5429325cee4d4ce8959af5314eeba1 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 20 Jul 2007 01:11:58 +0200 Subject: ide: drop "PIO data" argument from ide_get_best_pio_mode() * Drop no longer needed "PIO data" argument from ide_get_best_pio_mode() and convert all users accordingly. * Remove no longer needed ide_pio_data_t. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 0afa52c14ffa..14a87f619d17 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1377,12 +1377,8 @@ typedef struct ide_pio_timings_s { /* active + recovery (+ setup for some chips) */ } ide_pio_timings_t; -typedef struct ide_pio_data_s { - u8 pio_mode; -} ide_pio_data_t; - unsigned int ide_pio_cycle_time(ide_drive_t *, u8); -extern u8 ide_get_best_pio_mode (ide_drive_t *drive, u8 mode_wanted, u8 max_mode, ide_pio_data_t *d); +u8 ide_get_best_pio_mode(ide_drive_t *, u8, u8); extern const ide_pio_timings_t ide_pio_timings[6]; -- cgit v1.2.3 From 6a824c92db4d606c324272c4eed366fb71672440 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 20 Jul 2007 01:11:58 +0200 Subject: ide: remove ide_find_best_pio_mode() * Add ->host_flags to ide_hwif_t to store ide_pci_device_t.host_flags, assign it in setup-pci.c:ide_pci_setup_ports(). * Add IDE_HFLAG_PIO_NO_{BLACKLIST,DOWNGRADE} to ide_pci_device_t.host_flags and teach ide_get_best_pio_mode() about them. Also remove needless !drive->id check while at it (drive->id is always present). * Convert amd74xx, via82cxxx and ide-timing.h to use ide_get_best_pio_mode() and then remove no longer needed ide_find_best_pio_mode(). There should be no functionality changes caused by this patch. Acked-by: Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 14a87f619d17..9f72f6e0c954 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -681,6 +681,8 @@ typedef struct hwif_s { u8 straight8; /* Alan's straight 8 check */ u8 bus_state; /* power state of the IDE bus */ + u8 host_flags; + u8 atapi_dma; /* host supports atapi_dma */ u8 ultra_mask; u8 mwdma_mask; @@ -1245,7 +1247,12 @@ typedef struct ide_pci_enablebit_s { enum { /* Uses ISA control ports not PCI ones. */ IDE_HFLAG_ISA_PORTS = (1 << 0), + /* single port device */ IDE_HFLAG_SINGLE = (1 << 1), + /* don't use legacy PIO blacklist */ + IDE_HFLAG_PIO_NO_BLACKLIST = (1 << 2), + /* don't use conservative PIO "downgrade" */ + IDE_HFLAG_PIO_NO_DOWNGRADE = (1 << 3), }; typedef struct ide_pci_device_s { -- cgit v1.2.3 From 4099d14322149c7a467e4997b87be4ba8eb78697 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 20 Jul 2007 01:11:59 +0200 Subject: ide: add PIO masks * Add ATA_PIO[0-6] defines to . * Add ->pio_mask field to ide_pci_device_t and ide_hwif_t. * Add PIO masks to host drivers. change ACK-ed by Jeff Garzik . Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ata.h | 9 +++++++++ include/linux/ide.h | 3 +++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index b5a20162af32..23a22df039d8 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -64,6 +64,15 @@ enum { ATA_ID_PROD_LEN = 40, ATA_PCI_CTL_OFS = 2, + + ATA_PIO0 = (1 << 0), + ATA_PIO1 = ATA_PIO0 | (1 << 1), + ATA_PIO2 = ATA_PIO1 | (1 << 2), + ATA_PIO3 = ATA_PIO2 | (1 << 3), + ATA_PIO4 = ATA_PIO3 | (1 << 4), + ATA_PIO5 = ATA_PIO4 | (1 << 5), + ATA_PIO6 = ATA_PIO5 | (1 << 6), + ATA_UDMA0 = (1 << 0), ATA_UDMA1 = ATA_UDMA0 | (1 << 1), ATA_UDMA2 = ATA_UDMA1 | (1 << 2), diff --git a/include/linux/ide.h b/include/linux/ide.h index 9f72f6e0c954..5f5daad8bc54 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -683,6 +683,8 @@ typedef struct hwif_s { u8 host_flags; + u8 pio_mask; + u8 atapi_dma; /* host supports atapi_dma */ u8 ultra_mask; u8 mwdma_mask; @@ -1270,6 +1272,7 @@ typedef struct ide_pci_device_s { unsigned int extra; struct ide_pci_device_s *next; u8 host_flags; + u8 pio_mask; u8 udma_mask; } ide_pci_device_t; -- cgit v1.2.3 From 20c2df83d25c6a95affe6157a4c9cac4cf5ffaac Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 20 Jul 2007 10:11:58 +0900 Subject: mm: Remove slab destructors from kmem_cache_create(). Slab destructors were no longer supported after Christoph's c59def9f222d44bb7e2f0a559f2906191a0862d7 change. They've been BUGs for both slab and slub, and slob never supported them either. This rips out support for the dtor pointer from kmem_cache_create() completely and fixes up every single callsite in the kernel (there were about 224, not including the slab allocator definitions themselves, or the documentation references). Signed-off-by: Paul Mundt --- include/linux/i2o.h | 3 +-- include/linux/slab.h | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2o.h b/include/linux/i2o.h index 333a370a3bdc..9752307d16ba 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -946,8 +946,7 @@ static inline int i2o_pool_alloc(struct i2o_pool *pool, const char *name, strcpy(pool->name, name); pool->slab = - kmem_cache_create(pool->name, size, 0, SLAB_HWCACHE_ALIGN, NULL, - NULL); + kmem_cache_create(pool->name, size, 0, SLAB_HWCACHE_ALIGN, NULL); if (!pool->slab) goto free_name; diff --git a/include/linux/slab.h b/include/linux/slab.h index 0e1d0daef6a2..7d0ecc1659f0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -51,7 +51,6 @@ int slab_is_available(void); struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, unsigned long, - void (*)(void *, struct kmem_cache *, unsigned long), void (*)(void *, struct kmem_cache *, unsigned long)); void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); @@ -70,7 +69,7 @@ int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr); */ #define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\ sizeof(struct __struct), __alignof__(struct __struct),\ - (__flags), NULL, NULL) + (__flags), NULL) /* * The largest kmalloc size supported by the slab allocators is -- cgit v1.2.3 From 76c1ce7870fd9b05431da1bbd47fdafcc029a25b Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 1 May 2007 16:19:07 +1000 Subject: Split out common parts of prom.h This creates linux/of.h and includes asm/prom.h from it. We also include linux/of.h from asm/prom.h while we transition. Signed-off-by: Stephen Rothwell Acked-by: Paul Mackerras Acked-by: David S. Miller --- include/linux/of.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 include/linux/of.h (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h new file mode 100644 index 000000000000..47734ffd9745 --- /dev/null +++ b/include/linux/of.h @@ -0,0 +1,61 @@ +#ifndef _LINUX_OF_H +#define _LINUX_OF_H +/* + * Definitions for talking to the Open Firmware PROM on + * Power Macintosh and other computers. + * + * Copyright (C) 1996-2005 Paul Mackerras. + * + * Updates for PPC64 by Peter Bergner & David Engebretsen, IBM Corp. + * Updates for SPARC64 by David S. Miller + * Derived from PowerPC and Sparc prom.h files by Stephen Rothwell, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include + +#include +#include + +/* flag descriptions */ +#define OF_DYNAMIC 1 /* node and properties were allocated via kmalloc */ +#define OF_DETACHED 2 /* node has been detached from the device tree */ + +#define OF_BAD_ADDR ((u64)-1) + +extern struct device_node *of_find_node_by_name(struct device_node *from, + const char *name); +#define for_each_node_by_name(dn, name) \ + for (dn = of_find_node_by_name(NULL, name); dn; \ + dn = of_find_node_by_name(dn, name)) +extern struct device_node *of_find_node_by_type(struct device_node *from, + const char *type); +#define for_each_node_by_type(dn, type) \ + for (dn = of_find_node_by_type(NULL, type); dn; \ + dn = of_find_node_by_type(dn, type)) +extern struct device_node *of_find_compatible_node(struct device_node *from, + const char *type, const char *compat); +#define for_each_compatible_node(dn, type, compatible) \ + for (dn = of_find_compatible_node(NULL, type, compatible); dn; \ + dn = of_find_compatible_node(dn, type, compatible)) +extern struct device_node *of_find_node_by_path(const char *path); +extern struct device_node *of_find_node_by_phandle(phandle handle); +extern struct device_node *of_get_parent(const struct device_node *node); +extern struct device_node *of_get_next_child(const struct device_node *node, + struct device_node *prev); +extern struct property *of_find_property(const struct device_node *np, + const char *name, + int *lenp); +extern int of_device_is_compatible(const struct device_node *device, + const char *); +extern const void *of_get_property(const struct device_node *node, + const char *name, + int *lenp); +#define get_property(a, b, c) of_get_property((a), (b), (c)) +extern int of_n_addr_cells(struct device_node *np); +extern int of_n_size_cells(struct device_node *np); + +#endif /* _LINUX_OF_H */ -- cgit v1.2.3 From f898f8dbcec4848cddb8c5be2d0affd75779ebe2 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 1 May 2007 16:49:51 +1000 Subject: Begin consolidation of of_device.h This just moves the common stuff from the arch of_device.h files to linux/of_device.h. Signed-off-by: Stephen Rothwell Acked-by: Paul Mackerras Acked-by: David S. Miller --- include/linux/of_device.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 include/linux/of_device.h (limited to 'include/linux') diff --git a/include/linux/of_device.h b/include/linux/of_device.h new file mode 100644 index 000000000000..91bf84b9d144 --- /dev/null +++ b/include/linux/of_device.h @@ -0,0 +1,26 @@ +#ifndef _LINUX_OF_DEVICE_H +#define _LINUX_OF_DEVICE_H +#ifdef __KERNEL__ + +#include +#include +#include + +#include + +#define to_of_device(d) container_of(d, struct of_device, dev) + +extern const struct of_device_id *of_match_node( + const struct of_device_id *matches, const struct device_node *node); +extern const struct of_device_id *of_match_device( + const struct of_device_id *matches, const struct of_device *dev); + +extern struct of_device *of_dev_get(struct of_device *dev); +extern void of_dev_put(struct of_device *dev); + +extern int of_device_register(struct of_device *ofdev); +extern void of_device_unregister(struct of_device *ofdev); +extern void of_release_dev(struct device *dev); + +#endif /* __KERNEL__ */ +#endif /* _LINUX_OF_DEVICE_H */ -- cgit v1.2.3 From b41912ca345e6de8ec8469d57cd585881271e2b9 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 1 May 2007 16:12:57 +1000 Subject: Create linux/of_platorm.h Move common stuff from asm-powerpc/of_platform.h to here and move the common bits from asm-sparc*/of_device.h here as well. Create asm-sparc*/of_platform.h and move appropriate parts of of_device.h to them. Signed-off-by: Stephen Rothwell Acked-by: Paul Mackerras Acked-by: David S. Miller --- include/linux/of_platform.h | 55 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 include/linux/of_platform.h (limited to 'include/linux') diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h new file mode 100644 index 000000000000..c85d0f835783 --- /dev/null +++ b/include/linux/of_platform.h @@ -0,0 +1,55 @@ +#ifndef _LINUX_OF_PLATFORM_H +#define _LINUX_OF_PLATFORM_H +/* + * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp. + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include + +/* + * The of_platform_bus_type is a bus type used by drivers that do not + * attach to a macio or similar bus but still use OF probing + * mechanism + */ +extern struct bus_type of_platform_bus_type; + +/* + * An of_platform_driver driver is attached to a basic of_device on + * the "platform bus" (of_platform_bus_type) (or ISA, EBUS and SBUS + * busses on sparc). + */ +struct of_platform_driver +{ + char *name; + struct of_device_id *match_table; + struct module *owner; + + int (*probe)(struct of_device* dev, + const struct of_device_id *match); + int (*remove)(struct of_device* dev); + + int (*suspend)(struct of_device* dev, pm_message_t state); + int (*resume)(struct of_device* dev); + int (*shutdown)(struct of_device* dev); + + struct device_driver driver; +}; +#define to_of_platform_driver(drv) \ + container_of(drv,struct of_platform_driver, driver) + +#include + +extern struct of_device *of_find_device_by_node(struct device_node *np); + +#endif /* _LINUX_OF_PLATFORM_H */ -- cgit v1.2.3 From 3f23de10f283819bcdc0d2282e8b5b14c2e96d3b Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 3 May 2007 02:38:57 +1000 Subject: Create drivers/of/platform.c and populate it with the common parts from PowerPC and Sparc[64]. Signed-off-by: Stephen Rothwell Acked-by: Paul Mackerras Acked-by: David S. Miller --- include/linux/of_platform.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index c85d0f835783..5fd44e63fb26 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -52,4 +52,6 @@ struct of_platform_driver extern struct of_device *of_find_device_by_node(struct device_node *np); +extern int of_bus_type_init(struct bus_type *bus, const char *name); + #endif /* _LINUX_OF_PLATFORM_H */ -- cgit v1.2.3 From c2dea2d1fdbce86942dba0a968c523d8b7858bb5 Mon Sep 17 00:00:00 2001 From: Vasily Tarasov Date: Fri, 20 Jul 2007 10:06:38 +0200 Subject: cfq: async queue allocation per priority If we have two processes with different ioprio_class, but the same ioprio_data, their async requests will fall into the same queue. I guess such behavior is not expected, because it's not right to put real-time requests and best-effort requests in the same queue. The attached patch fixes the problem by introducing additional *cfqq fields on cfqd, pointing to per-(class,priority) async queues. Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 2eaa142cd061..baf29387cab4 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -53,6 +53,14 @@ static inline int task_ioprio(struct task_struct *task) return IOPRIO_NORM; } +static inline int task_ioprio_class(struct task_struct *task) +{ + if (ioprio_valid(task->ioprio)) + return IOPRIO_PRIO_CLASS(task->ioprio); + + return IOPRIO_CLASS_BE; +} + static inline int task_nice_ioprio(struct task_struct *task) { return (task_nice(task) + 20) / 5; -- cgit v1.2.3 From 7a05f067c0da139613cbe74583bb7d208a5f87b9 Mon Sep 17 00:00:00 2001 From: Liam Girdwood Date: Mon, 14 May 2007 11:05:09 +0200 Subject: [ALSA] ASoC S3C24xx machine drivers - I2C ID for LM4857 This patch adds I2C ID for the LM4857 audio amp and corrects the spacing of the WM8731, WM8750 and WM8753 ID's. Signed-off-by: Liam Girdwood Signed-off-by: Takashi Iwai Signed-off-by: Jaroslav Kysela --- include/linux/i2c-id.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index aa83d4163096..b69014865714 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -115,9 +115,10 @@ #define I2C_DRIVERID_KS0127 86 /* Samsung ks0127 video decoder */ #define I2C_DRIVERID_TLV320AIC23B 87 /* TI TLV320AIC23B audio codec */ #define I2C_DRIVERID_ISL1208 88 /* Intersil ISL1208 RTC */ -#define I2C_DRIVERID_WM8731 89 /* Wolfson WM8731 audio codec */ -#define I2C_DRIVERID_WM8750 90 /* Wolfson WM8750 audio codec */ -#define I2C_DRIVERID_WM8753 91 /* Wolfson WM8753 audio codec */ +#define I2C_DRIVERID_WM8731 89 /* Wolfson WM8731 audio codec */ +#define I2C_DRIVERID_WM8750 90 /* Wolfson WM8750 audio codec */ +#define I2C_DRIVERID_WM8753 91 /* Wolfson WM8753 audio codec */ +#define I2C_DRIVERID_LM4857 92 /* LM4857 Audio Amplifier */ #define I2C_DRIVERID_I2CDEV 900 #define I2C_DRIVERID_ARP 902 /* SMBus ARP Client */ -- cgit v1.2.3 From 9977126c4b65c1396b665f7a0eeb8c7dede336f9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Jul 2007 14:29:38 +0900 Subject: libata: add @is_cmd to ata_tf_to_fis() Add @is_cmd to ata_tf_to_fis(). This controls bit 7 of the second byte which tells the device whether this H2D FIS is for a command or not. This cleans up ahci a bit and will be used by PMP. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 47cd2a1c5544..5d3df6cde272 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -764,7 +764,8 @@ extern unsigned int ata_dev_try_classify(struct ata_port *, unsigned int, u8 *); */ extern void ata_tf_load(struct ata_port *ap, const struct ata_taskfile *tf); extern void ata_tf_read(struct ata_port *ap, struct ata_taskfile *tf); -extern void ata_tf_to_fis(const struct ata_taskfile *tf, u8 *fis, u8 pmp); +extern void ata_tf_to_fis(const struct ata_taskfile *tf, + u8 pmp, int is_cmd, u8 *fis); extern void ata_tf_from_fis(const u8 *fis, struct ata_taskfile *tf); extern void ata_noop_dev_select (struct ata_port *ap, unsigned int device); extern void ata_std_dev_select (struct ata_port *ap, unsigned int device); -- cgit v1.2.3 From b64bbc39f2122a2276578e40144af69ef01decd4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Jul 2007 14:29:39 +0900 Subject: libata: improve EH report formatting Requiring LLDs to format multiple error description messages properly doesn't work too well. Help LLDs a bit by making ata_ehi_push_desc() insert ", " on each invocation. __ata_ehi_push_desc() is the raw version without the automatic separator. While at it, make ehi_desc interface proper functions instead of macros. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 5d3df6cde272..94b37d180680 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -910,16 +910,9 @@ extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, /* * ata_eh_info helpers */ -#define ata_ehi_push_desc(ehi, fmt, args...) do { \ - (ehi)->desc_len += scnprintf((ehi)->desc + (ehi)->desc_len, \ - ATA_EH_DESC_LEN - (ehi)->desc_len, \ - fmt , ##args); \ -} while (0) - -#define ata_ehi_clear_desc(ehi) do { \ - (ehi)->desc[0] = '\0'; \ - (ehi)->desc_len = 0; \ -} while (0) +extern void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...); +extern void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...); +extern void ata_ehi_clear_desc(struct ata_eh_info *ehi); static inline void __ata_ehi_hotplugged(struct ata_eh_info *ehi) { -- cgit v1.2.3 From 5335b729064e03319cd2d5219770451dbb1d7f67 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Jul 2007 14:29:40 +0900 Subject: libata: implement AC_ERR_NCQ When an NCQ command fails, all commands in flight are aborted and the offending one is reported using log page 10h. Depending on controller characteristics and LLD implementation, all commands may appear as having a device error due to shared TF status making it hard to determine what's actually going on. This patch adds AC_ERR_NCQ, marks the command reported by log page 10h with it and print extra "" after the error report for the command to help distinguishing the offending command. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 94b37d180680..cb181713d9b5 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -323,6 +323,7 @@ enum ata_completion_errors { AC_ERR_INVALID = (1 << 7), /* invalid argument */ AC_ERR_OTHER = (1 << 8), /* unknown */ AC_ERR_NODEV_HINT = (1 << 9), /* polling device detection hint */ + AC_ERR_NCQ = (1 << 10), /* marker for offending NCQ qc */ }; /* forward declarations */ -- cgit v1.2.3 From da3dbb17a0e9a9ec7f5aed95f1fddadb790edc9d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Jul 2007 14:29:40 +0900 Subject: libata: make ->scr_read/write callbacks return error code Convert ->scr_read/write callbacks to return error code to better indicate failure. This will help handling of SCR_NOTIFICATION. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index cb181713d9b5..c732b3e78e28 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -620,9 +620,8 @@ struct ata_port_operations { u8 (*irq_on) (struct ata_port *); u8 (*irq_ack) (struct ata_port *ap, unsigned int chk_drq); - u32 (*scr_read) (struct ata_port *ap, unsigned int sc_reg); - void (*scr_write) (struct ata_port *ap, unsigned int sc_reg, - u32 val); + int (*scr_read) (struct ata_port *ap, unsigned int sc_reg, u32 *val); + int (*scr_write) (struct ata_port *ap, unsigned int sc_reg, u32 val); int (*port_suspend) (struct ata_port *ap, pm_message_t mesg); int (*port_resume) (struct ata_port *ap); -- cgit v1.2.3 From 008a78961ec72990d09d7625ef9499d7317d040d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Jul 2007 14:29:40 +0900 Subject: libata: improve SATA PHY speed down logic sata_down_spd_limit() first reads the current SPD from SStatus and limit the speed to the lower one of one below the current limit or one below the current SPD in SStatus. SPD may not be accessible or valid when SPD down is requested making sata_down_spd_limit() fail when it's most needed. This patch makes the current SPD cached after each successful reset and forces GEN I speed (1.5Gbps) if neither of SStatus or the cached value is valid, so sata_down_spd_limit() is now guaranteed to lower the speed limit if lower speed is available. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index c732b3e78e28..16ebdf152c75 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -531,6 +531,7 @@ struct ata_port { unsigned int cbl; /* cable type; ATA_CBL_xxx */ unsigned int hw_sata_spd_limit; unsigned int sata_spd_limit; /* SATA PHY speed limit */ + unsigned int sata_spd; /* current SATA PHY speed */ /* record runtime error info, protected by host lock */ struct ata_eh_info eh_info; -- cgit v1.2.3 From f8f1e1cc0cd4d75c73e9a55a0ede8958e4fa14f1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Jul 2007 14:29:40 +0900 Subject: libata: reorganize ata_ehi_hotplugged() __ata_ehi_hotplugged() now has no users. Regorganize ata_ehi_hotplugged() such that a new function ata_ehi_schedule_probe() deals with scheduling probing. ata_ehi_hotplugged() calls it and additionally marks hotplug specific flags. ata_ehi_schedule_probe() will be used laster. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 16ebdf152c75..74800ad6d81f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -915,16 +915,17 @@ extern void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...); extern void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...); extern void ata_ehi_clear_desc(struct ata_eh_info *ehi); -static inline void __ata_ehi_hotplugged(struct ata_eh_info *ehi) +static inline void ata_ehi_schedule_probe(struct ata_eh_info *ehi) { - ehi->flags |= ATA_EHI_HOTPLUGGED | ATA_EHI_RESUME_LINK; + ehi->flags |= ATA_EHI_RESUME_LINK; ehi->action |= ATA_EH_SOFTRESET; ehi->probe_mask |= (1 << ATA_MAX_DEVICES) - 1; } static inline void ata_ehi_hotplugged(struct ata_eh_info *ehi) { - __ata_ehi_hotplugged(ehi); + ata_ehi_schedule_probe(ehi); + ehi->flags |= ATA_EHI_HOTPLUGGED; ehi->err_mask |= AC_ERR_ATA_BUS; } -- cgit v1.2.3 From 5ddf24c5ea9d715dc4f5d5d5dd1c9337d90466dc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Jul 2007 14:29:41 +0900 Subject: libata: implement EH fast drain In most cases, when EH is scheduled, all in-flight commands are aborted causing EH to kick in immediately. However, in some cases (especially with PMP), it's unclear which commands are affected by the error condition and although aborting all in-flight commands work, it isn't optimal and may cause unnecessary disruption. On the other hand, waiting for in-flight commands to drain themselves can take up to 30seconds. This patch implements EH fast drain to handle such situations. It gives in-flight commands some time to finish up but doesn't wait for too long. After EH is scheduled, fast drain timer is started and if no other completion occurs in ATA_EH_FASTDRAIN_INTERVAL all in-flight commands are aborted. If any completion occurred in the interval, the port is given another interval to finish up itself. Currently ATA_EH_FASTDRAIN_INTERVAL is 3 secs which should be enough for finishing up most commands. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 74800ad6d81f..be5a43928c84 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -565,6 +565,9 @@ struct ata_port { pm_message_t pm_mesg; int *pm_result; + struct timer_list fastdrain_timer; + unsigned long fastdrain_cnt; + void *private_data; #ifdef CONFIG_ATA_ACPI -- cgit v1.2.3 From d046943cbaf332f75284ad99f4b3e60bae7ffff2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Jul 2007 16:18:06 +0100 Subject: fix gfp_t annotations for slub Since we have use like ~SLUB_DMA, we ought to have the type set right in both cases. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 07f7e4cbcee3..124270df8734 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -160,7 +160,7 @@ static inline struct kmem_cache *kmalloc_slab(size_t size) #define SLUB_DMA __GFP_DMA #else /* Disable DMA functionality */ -#define SLUB_DMA 0 +#define SLUB_DMA (__force gfp_t)0 #endif void *kmem_cache_alloc(struct kmem_cache *, gfp_t); -- cgit v1.2.3 From eb0645a8b1f14da300f40bb9f424640cd1181fbf Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 20 Jul 2007 00:31:46 -0700 Subject: async_tx: fix kmap_atomic usage in async_memcpy Andrew Morton: [async_memcpy] is very wrong if both ASYNC_TX_KMAP_DST and ASYNC_TX_KMAP_SRC can ever be set. We'll end up using the same kmap slot for both src add dest and we get either corrupted data or a BUG. Evgeniy Polyakov: Btw, shouldn't it always be kmap_atomic() even if flag is not set. That pages are usual one returned by alloc_page(). So fix the usage of kmap_atomic and kill the ASYNC_TX_KMAP_DST and ASYNC_TX_KMAP_SRC flags. Cc: Andrew Morton Cc: Evgeniy Polyakov Signed-off-by: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/async_tx.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index ff1255079fa1..bdca3f1b3213 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -51,10 +51,6 @@ struct dma_chan_ref { * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a * dependency chain * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. - * @ASYNC_TX_KMAP_SRC: if the transaction is to be performed synchronously - * take an atomic mapping (KM_USER0) on the source page(s) - * @ASYNC_TX_KMAP_DST: if the transaction is to be performed synchronously - * take an atomic mapping (KM_USER0) on the dest page(s) */ enum async_tx_flags { ASYNC_TX_XOR_ZERO_DST = (1 << 0), @@ -62,8 +58,6 @@ enum async_tx_flags { ASYNC_TX_ASSUME_COHERENT = (1 << 2), ASYNC_TX_ACK = (1 << 3), ASYNC_TX_DEP_ACK = (1 << 4), - ASYNC_TX_KMAP_SRC = (1 << 5), - ASYNC_TX_KMAP_DST = (1 << 6), }; #ifdef CONFIG_DMA_ENGINE -- cgit v1.2.3 From 0aa366f351d044703e25c8425e508170e80d83b1 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 20 Jul 2007 11:22:30 -0700 Subject: [IA64] Convert to generic timekeeping/clocksource This is a merge of Peter Keilty's initial patch (which was revived by Bob Picco) for this with Hidetoshi Seto's fixes and scaling improvements. Acked-by: Bob Picco Signed-off-by: Tony Luck --- include/linux/clocksource.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index bf297b03a4e4..16ea3374dddf 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -67,6 +67,12 @@ struct clocksource { unsigned long flags; cycle_t (*vread)(void); void (*resume)(void); +#ifdef CONFIG_IA64 + void *fsys_mmio; /* used by fsyscall asm code */ +#define CLKSRC_FSYS_MMIO_SET(mmio, addr) ((mmio) = (addr)) +#else +#define CLKSRC_FSYS_MMIO_SET(mmio, addr) do { } while (0) +#endif /* timekeeping specific data, ignore */ cycle_t cycle_interval; -- cgit v1.2.3 From 1f564ad6d4182859612cbae452122e5eb2d62a76 Mon Sep 17 00:00:00 2001 From: Bob Picco Date: Wed, 18 Jul 2007 15:51:28 -0700 Subject: [IA64] remove time interpolator Remove time_interpolator code (This is generic code, but only user was ia64. It has been superseded by the CONFIG_GENERIC_TIME code). Signed-off-by: Bob Picco Signed-off-by: John Stultz Signed-off-by: Peter Keilty Signed-off-by: Tony Luck --- include/linux/timex.h | 60 --------------------------------------------------- 1 file changed, 60 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index da929dbbea2a..37ac3ff90faf 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -224,66 +224,6 @@ static inline int ntp_synced(void) __x < 0 ? -(-__x >> __s) : __x >> __s; \ }) - -#ifdef CONFIG_TIME_INTERPOLATION - -#define TIME_SOURCE_CPU 0 -#define TIME_SOURCE_MMIO64 1 -#define TIME_SOURCE_MMIO32 2 -#define TIME_SOURCE_FUNCTION 3 - -/* For proper operations time_interpolator clocks must run slightly slower - * than the standard clock since the interpolator may only correct by having - * time jump forward during a tick. A slower clock is usually a side effect - * of the integer divide of the nanoseconds in a second by the frequency. - * The accuracy of the division can be increased by specifying a shift. - * However, this may cause the clock not to be slow enough. - * The interpolator will self-tune the clock by slowing down if no - * resets occur or speeding up if the time jumps per analysis cycle - * become too high. - * - * Setting jitter compensates for a fluctuating timesource by comparing - * to the last value read from the timesource to insure that an earlier value - * is not returned by a later call. The price to pay - * for the compensation is that the timer routines are not as scalable anymore. - */ - -struct time_interpolator { - u16 source; /* time source flags */ - u8 shift; /* increases accuracy of multiply by shifting. */ - /* Note that bits may be lost if shift is set too high */ - u8 jitter; /* if set compensate for fluctuations */ - u32 nsec_per_cyc; /* set by register_time_interpolator() */ - void *addr; /* address of counter or function */ - cycles_t mask; /* mask the valid bits of the counter */ - unsigned long offset; /* nsec offset at last update of interpolator */ - u64 last_counter; /* counter value in units of the counter at last update */ - cycles_t last_cycle; /* Last timer value if TIME_SOURCE_JITTER is set */ - u64 frequency; /* frequency in counts/second */ - long drift; /* drift in parts-per-million (or -1) */ - unsigned long skips; /* skips forward */ - unsigned long ns_skipped; /* nanoseconds skipped */ - struct time_interpolator *next; -}; - -extern void register_time_interpolator(struct time_interpolator *); -extern void unregister_time_interpolator(struct time_interpolator *); -extern void time_interpolator_reset(void); -extern unsigned long time_interpolator_get_offset(void); -extern void time_interpolator_update(long delta_nsec); - -#else /* !CONFIG_TIME_INTERPOLATION */ - -static inline void time_interpolator_reset(void) -{ -} - -static inline void time_interpolator_update(long delta_nsec) -{ -} - -#endif /* !CONFIG_TIME_INTERPOLATION */ - #define TICK_LENGTH_SHIFT 32 #ifdef CONFIG_NO_HZ -- cgit v1.2.3 From 1d4ec7b1d6f130818f9b62dea3411d9ee2ff6ff6 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 20 Jul 2007 12:13:20 -0700 Subject: Fix ZERO_OR_NULL_PTR(ZERO_SIZE_PTR) The comparison with ZERO_SIZE_PTR in ZERO_OR_NULL_PTR() needs to be <= (not just <) so that ZERO_OR_NULL_PTR(ZERO_SIZE_PTR) is 1. Signed-off-by: Roland Dreier [ Duh! - Linus ] Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 7d0ecc1659f0..d859354b9e51 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -40,7 +40,7 @@ */ #define ZERO_SIZE_PTR ((void *)16) -#define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) < \ +#define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ (unsigned long)ZERO_SIZE_PTR) /* -- cgit v1.2.3 From 8e68e2f248332a9c3fd4f08258f488c209bd3e0c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Jul 2007 21:39:47 +0200 Subject: [CELL] spufs: extension of spu_create to support affinity definition This patch adds support for additional flags at spu_create, which relate to the establishment of affinity between contexts and contexts to memory. A fourth, optional, parameter is supported. This parameter represent a affinity neighbor of the context being created, and is used when defining SPU-SPU affinity. Affinity is represented as a doubly linked list of spu_contexts. Signed-off-by: Andre Detsch Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7a8b1e3322e0..61def7c8fbb3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -549,7 +549,7 @@ asmlinkage long sys_inotify_rm_watch(int fd, u32 wd); asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus); asmlinkage long sys_spu_create(const char __user *name, - unsigned int flags, mode_t mode); + unsigned int flags, mode_t mode, int fd); asmlinkage long sys_mknodat(int dfd, const char __user * filename, int mode, unsigned dev); -- cgit v1.2.3 From 1474855d0878cced6f39f51f3c2bd7428b44cb1e Mon Sep 17 00:00:00 2001 From: Bob Nelson Date: Fri, 20 Jul 2007 21:39:53 +0200 Subject: [CELL] oprofile: add support to OProfile for profiling CELL BE SPUs From: Maynard Johnson This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Exports spu_set_profile_private_kref and spu_get_profile_private_kref which are used by OProfile to store private profile information in spufs data structures. Also incorporated several fixes from other patches (rrn). Check pointer returned from kzalloc. Eliminated unnecessary cast. Better error handling and cleanup in the related area. 64-bit unsigned long parameter was being demoted to 32-bit unsigned int and eventually promoted back to unsigned long. Signed-off-by: Carl Love Signed-off-by: Maynard Johnson Signed-off-by: Bob Nelson Signed-off-by: Arnd Bergmann Acked-by: Paul Mackerras --- include/linux/dcookies.h | 1 + include/linux/elf-em.h | 3 ++- include/linux/oprofile.h | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dcookies.h b/include/linux/dcookies.h index 0fe7cdf326f7..98c69ab80c84 100644 --- a/include/linux/dcookies.h +++ b/include/linux/dcookies.h @@ -12,6 +12,7 @@ #ifdef CONFIG_PROFILING +#include #include struct dcookie_user; diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h index 0311bad838b1..5834e843a946 100644 --- a/include/linux/elf-em.h +++ b/include/linux/elf-em.h @@ -20,7 +20,8 @@ #define EM_PARISC 15 /* HPPA */ #define EM_SPARC32PLUS 18 /* Sun's "v8plus" */ #define EM_PPC 20 /* PowerPC */ -#define EM_PPC64 21 /* PowerPC64 */ +#define EM_PPC64 21 /* PowerPC64 */ +#define EM_SPU 23 /* Cell BE SPU */ #define EM_SH 42 /* SuperH */ #define EM_SPARCV9 43 /* SPARC v9 64-bit */ #define EM_IA_64 50 /* HP/Intel IA-64 */ diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 0d514b252454..041bb31100f4 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -17,6 +17,26 @@ #include #include +/* Each escaped entry is prefixed by ESCAPE_CODE + * then one of the following codes, then the + * relevant data. + * These #defines live in this file so that arch-specific + * buffer sync'ing code can access them. + */ +#define ESCAPE_CODE ~0UL +#define CTX_SWITCH_CODE 1 +#define CPU_SWITCH_CODE 2 +#define COOKIE_SWITCH_CODE 3 +#define KERNEL_ENTER_SWITCH_CODE 4 +#define KERNEL_EXIT_SWITCH_CODE 5 +#define MODULE_LOADED_CODE 6 +#define CTX_TGID_CODE 7 +#define TRACE_BEGIN_CODE 8 +#define TRACE_END_CODE 9 +#define XEN_ENTER_SWITCH_CODE 10 +#define SPU_PROFILING_CODE 11 +#define SPU_CTX_SWITCH_CODE 12 + struct super_block; struct dentry; struct file_operations; @@ -35,6 +55,14 @@ struct oprofile_operations { int (*start)(void); /* Stop delivering interrupts. */ void (*stop)(void); + /* Arch-specific buffer sync functions. + * Return value = 0: Success + * Return value = -1: Failure + * Return value = 1: Run generic sync function + */ + int (*sync_start)(void); + int (*sync_stop)(void); + /* Initiate a stack backtrace. Optional. */ void (*backtrace)(struct pt_regs * const regs, unsigned int depth); /* CPU identification string. */ @@ -55,6 +83,13 @@ int oprofile_arch_init(struct oprofile_operations * ops); */ void oprofile_arch_exit(void); +/** + * Add data to the event buffer. + * The data passed is free-form, but typically consists of + * file offsets, dcookies, context information, and ESCAPE codes. + */ +void add_event_entry(unsigned long data); + /** * Add a sample. This may be called from any context. Pass * smp_processor_id() as cpu. -- cgit v1.2.3 From 3d58ffe2aa107df6db57f875dba5368960b17cde Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 18 Jul 2007 18:41:08 -0300 Subject: V4L/DVB (5867): videodev2.h: add missing for userspace When videodev2.h is included by an application, it needs to include for the timeval struct. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index d16a2b57dc81..c66c8a3410b9 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -60,6 +60,7 @@ #include /* need __user */ #else #define __user +#include #endif #include -- cgit v1.2.3 From 31ce72a6b1c7635259cf522459539c0611f2c50c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 20 Jul 2007 19:45:45 -0700 Subject: [NET]: Fix loopback crashes when multiqueue is enabled. From: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9820ca1e45e2..4a616d73cc25 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -575,7 +575,7 @@ struct net_device /* The TX queue control structures */ unsigned int egress_subqueue_count; - struct net_device_subqueue egress_subqueue[0]; + struct net_device_subqueue egress_subqueue[1]; }; #define to_net_dev(d) container_of(d, struct net_device, dev) -- cgit v1.2.3 From 39dca558a5b52b63e49bc234a7e887be092aa690 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 20 Jul 2007 18:22:17 -0500 Subject: [SCSI] bsg: make class backlinks Currently, bsg doesn't make class backlinks (a process whereby you'd get a link to bsg in the device directory in the same way you get one for sg). This is because the bsg device is uninitialised, so the class device has nothing it can attach to. The fix is to make the bsg device point to the cdevice of the entity creating the bsg, necessitating changing the bsg_register_queue() prototype into a form that takes the generic device. Acked-by: FUJITA Tomonori Signed-off-by: James Bottomley --- include/linux/bsg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bsg.h b/include/linux/bsg.h index 8547b10c388b..f415f89e0ac8 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -57,10 +57,10 @@ struct bsg_class_device { struct request_queue *queue; }; -extern int bsg_register_queue(struct request_queue *, const char *); +extern int bsg_register_queue(struct request_queue *, struct device *, const char *); extern void bsg_unregister_queue(struct request_queue *); #else -#define bsg_register_queue(disk, name) (0) +#define bsg_register_queue(disk, dev, name) (0) #define bsg_unregister_queue(disk) do { } while (0) #endif -- cgit v1.2.3 From d3fec424b23c47686efcf3f2004c3f1c1cee4d9c Mon Sep 17 00:00:00 2001 From: Jan Harkes Date: Sat, 21 Jul 2007 04:37:26 -0700 Subject: coda: remove CODA_STORE/CODA_RELEASE upcalls This is an variation on the patch sent by Christoph Hellwig which kills file_count abuse by the Coda kernel module by moving the coda_flush functionality into coda_release. However part of reason we were using the coda_flush callback was to allow Coda to pass errors that occur during writeback from the userspace cache manager back to close(). As Al Viro explained on linux-fsdevel, it is impossible to guarantee that such errors can in fact be returned back to the caller. There are many cases where the last reference to a file is not released by the close system call and it is also impossible to pick some close as a 'last-close' and delay it until all other references have been destroyed. The CODA_STORE/CODA_RELEASE upcall combination is clearly a broken design, and it is better to remove it completely. Signed-off-by: Jan Harkes Cc: Christoph Hellwig Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/coda_linux.h | 1 - include/linux/coda_psdev.h | 3 --- 2 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h index c4079b403e9e..1c47a34aa794 100644 --- a/include/linux/coda_linux.h +++ b/include/linux/coda_linux.h @@ -36,7 +36,6 @@ extern const struct file_operations coda_ioctl_operations; /* operations shared over more than one file */ int coda_open(struct inode *i, struct file *f); -int coda_flush(struct file *f, fl_owner_t id); int coda_release(struct inode *i, struct file *f); int coda_permission(struct inode *inode, int mask, struct nameidata *nd); int coda_revalidate_inode(struct dentry *); diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index aa8f454b3b77..07ae8f846055 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -33,9 +33,6 @@ int venus_setattr(struct super_block *, struct CodaFid *, struct coda_vattr *); int venus_lookup(struct super_block *sb, struct CodaFid *fid, const char *name, int length, int *type, struct CodaFid *resfid); -int venus_store(struct super_block *sb, struct CodaFid *fid, int flags, - vuid_t uid); -int venus_release(struct super_block *sb, struct CodaFid *fid, int flags); int venus_close(struct super_block *sb, struct CodaFid *fid, int flags, vuid_t uid); int venus_open(struct super_block *sb, struct CodaFid *fid, int flags, -- cgit v1.2.3 From 93da56efcf8c6a111f0349f6b7651172d4745ca0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 21 Jul 2007 04:37:33 -0700 Subject: clockevents: remove prototypes of removed functions Signed-off-by: Thomas Gleixner Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/clockchips.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 8486e78f7335..8d7a39019ace 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -119,10 +119,6 @@ extern void clockevents_register_device(struct clock_event_device *dev); extern void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new); -extern -struct clock_event_device *clockevents_request_device(unsigned int features, - cpumask_t cpumask); -extern void clockevents_release_device(struct clock_event_device *dev); extern void clockevents_set_mode(struct clock_event_device *dev, enum clock_event_mode mode); extern int clockevents_register_notifier(struct notifier_block *nb); -- cgit v1.2.3 From 18de5bc4c1f1f1fa5e14f354a7603bd6e9d4e3b6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 21 Jul 2007 04:37:34 -0700 Subject: clockevents: fix resume logic We need to make sure, that the clockevent devices are resumed, before the tick is resumed. The current resume logic does not guarantee this. Add CLOCK_EVT_MODE_RESUME and call the set mode functions of the clock event devices before resuming the tick / oneshot functionality. Fixup the existing users. Thanks to Nigel Cunningham for tracking down a long standing thinko, which affected the jinxed VAIO. [akpm@linux-foundation.org: xen build fix] Signed-off-by: Thomas Gleixner Cc: john stultz Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/clockchips.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 8d7a39019ace..e0bd46eb2414 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -23,6 +23,7 @@ enum clock_event_mode { CLOCK_EVT_MODE_SHUTDOWN, CLOCK_EVT_MODE_PERIODIC, CLOCK_EVT_MODE_ONESHOT, + CLOCK_EVT_MODE_RESUME, }; /* Clock event notification values */ -- cgit v1.2.3 From 82644459c592a28a3eab682f9b88d81019ddfe8b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 21 Jul 2007 04:37:37 -0700 Subject: NTP: move the cmos update code into ntp.c i386 and sparc64 have the identical code to update the cmos clock. Move it into kernel/time/ntp.c as there are other architectures coming along with the same requirements. [akpm@linux-foundation.org: build fixes] Signed-off-by: Thomas Gleixner Cc: Chris Wright Cc: Ingo Molnar Cc: john stultz Cc: David Miller Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index ec3b0ced0afe..e6aea5146e5d 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -4,6 +4,7 @@ #include #ifdef __KERNEL__ +# include # include #endif @@ -94,6 +95,8 @@ extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock __attribute__((weak)); extern unsigned long read_persistent_clock(void); +extern int update_persistent_clock(struct timespec now); +extern int no_sync_cmos_clock __read_mostly; void timekeeping_init(void); static inline unsigned long get_seconds(void) -- cgit v1.2.3 From ae2c6dcf90c5a9ff9bd9a176cafd43a255fcc64b Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Sat, 21 Jul 2007 17:09:56 +0200 Subject: x86_64: various cleanups in NUMA scan node In acpi_scan_nodes(), we immediately return -1 if acpi_numa <= 0, meaning we haven't detected any underlying ACPI topology or we have explicitly disabled its use from the command-line with numa=noacpi. acpi_table_print_srat_entry() and acpi_table_parse_srat() are only referenced within drivers/acpi/numa.c, so we can mark them as static and remove their prototypes from the header file. Likewise, pxm_to_node_map[] and node_to_pxm_map[] are only used within drivers/acpi/numa.c, so we mark them as static and remove their externs from the header file. The automatic 'result' variable is unused in acpi_numa_init(), so it's removed. Signed-off-by: David Rientjes Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/acpi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index dc234c508a6f..e88b62e6b3aa 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -88,10 +88,8 @@ int acpi_table_parse (char *id, acpi_table_handler handler); int __init acpi_table_parse_entries(char *id, unsigned long table_size, int entry_id, acpi_table_entry_handler handler, unsigned int max_entries); int acpi_table_parse_madt (enum acpi_madt_type id, acpi_table_entry_handler handler, unsigned int max_entries); -int acpi_table_parse_srat (enum acpi_srat_type id, acpi_table_entry_handler handler, unsigned int max_entries); int acpi_parse_mcfg (struct acpi_table_header *header); void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); -void acpi_table_print_srat_entry (struct acpi_subtable_header *srat); /* the following four functions are architecture-dependent */ #ifdef CONFIG_HAVE_ARCH_PARSE_SRAT -- cgit v1.2.3 From a586df067afe0580bb02b7a6312ca2afe49bba03 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 21 Jul 2007 17:10:00 +0200 Subject: x86: Support __attribute__((__cold__)) in gcc 4.3 gcc 4.3 supports a new __attribute__((__cold__)) to mark functions cold. Any path directly leading to a call of this function will be unlikely. And gcc will try to generate smaller code for the function itself. Please use with care. The code generation advantage isn't large and in most cases it is not worth uglifying code with this. This patch marks some common error functions like panic(), printk() as cold. This will longer term make many unlikely()s unnecessary, although we can keep them for now for older compilers. BUG is not marked cold because there is currently no way to tell gcc to mark a inline function told. Also all __init and __exit functions are marked cold. With a non -Os build this will tell the compiler to generate slightly smaller code for them. I think it currently only uses less alignments for labels, but that might change in the future. One disadvantage over *likely() is that they cannot be easily instrumented to verify them. Another drawback is that only the latest gcc 4.3 snapshots support this. Unfortunately we cannot detect this using the preprocessor. This means older snapshots will fail now. I don't think that's a problem because they are unreleased compilers that nobody should be using. gcc also has a __hot__ attribute, but I don't see any sense in using this in the kernel right now. But someday I hope gcc will be able to use more aggressive optimizing for hot functions even in -Os, if that happens it should be added. Includes compile fix from Thomas Gleixner. Cc: Jan Hubicka Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc4.h | 18 ++++++++++++++++++ include/linux/compiler.h | 9 +++++++++ include/linux/init.h | 8 ++++---- include/linux/kernel.h | 8 ++++---- 4 files changed, 35 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index a03e9398a6c2..14f7494280f0 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -23,3 +23,21 @@ * code */ #define uninitialized_var(x) x = x + +#if !(__GNUC__ == 4 && __GNUC_MINOR__ < 3) +/* Mark functions as cold. gcc will assume any path leading to a call + to them will be unlikely. This means a lot of manual unlikely()s + are unnecessary now for any paths leading to the usual suspects + like BUG(), printk(), panic() etc. [but let's keep them for now for + older compilers] + + Early snapshots of gcc 4.3 don't support this and we can't detect this + in the preprocessor, but we can live with this because they're unreleased. + Maketime probing would be overkill here. + + gcc also has a __attribute__((__hot__)) to move hot functions into + a special section, but I don't see any sense in this right now in + the kernel context */ +#define __cold __attribute__((__cold__)) + +#endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 8287a72bb6a9..12a1291855e2 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -174,4 +174,13 @@ extern void __chk_io_ptr(const void __iomem *); # define __attribute_const__ /* unimplemented */ #endif +/* + * Tell gcc if a function is cold. The compiler will assume any path + * directly leading to the call is unlikely. + */ + +#ifndef __cold +#define __cold +#endif + #endif /* __LINUX_COMPILER_H */ diff --git a/include/linux/init.h b/include/linux/init.h index 5b5285316339..f0d0e3295a9b 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -40,10 +40,10 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __attribute__ ((__section__ (".init.text"))) +#define __init __attribute__ ((__section__ (".init.text"))) __cold #define __initdata __attribute__ ((__section__ (".init.data"))) #define __exitdata __attribute__ ((__section__(".exit.data"))) -#define __exit_call __attribute_used__ __attribute__ ((__section__ (".exitcall.exit"))) +#define __exit_call __attribute_used__ __attribute__ ((__section__ (".exitcall.exit"))) __cold /* modpost check for section mismatches during the kernel build. * A section mismatch happens when there are references from a @@ -59,9 +59,9 @@ #define __initdata_refok __attribute__ ((__section__ (".data.init.refok"))) #ifdef MODULE -#define __exit __attribute__ ((__section__(".exit.text"))) +#define __exit __attribute__ ((__section__(".exit.text"))) __cold #else -#define __exit __attribute_used__ __attribute__ ((__section__(".exit.text"))) +#define __exit __attribute_used__ __attribute__ ((__section__(".exit.text"))) __cold #endif /* For assembly routines */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 1eb9cde550c4..4300bb462d29 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -106,7 +106,7 @@ extern int cond_resched(void); extern struct atomic_notifier_head panic_notifier_list; extern long (*panic_blink)(long time); NORET_TYPE void panic(const char * fmt, ...) - __attribute__ ((NORET_AND format (printf, 1, 2))); + __attribute__ ((NORET_AND format (printf, 1, 2))) __cold; extern void oops_enter(void); extern void oops_exit(void); extern int oops_may_print(void); @@ -155,14 +155,14 @@ extern void dump_thread(struct pt_regs *regs, struct user *dump); asmlinkage int vprintk(const char *fmt, va_list args) __attribute__ ((format (printf, 1, 0))); asmlinkage int printk(const char * fmt, ...) - __attribute__ ((format (printf, 1, 2))); + __attribute__ ((format (printf, 1, 2))) __cold; #else static inline int vprintk(const char *s, va_list args) __attribute__ ((format (printf, 1, 0))); static inline int vprintk(const char *s, va_list args) { return 0; } static inline int printk(const char *s, ...) __attribute__ ((format (printf, 1, 2))); -static inline int printk(const char *s, ...) { return 0; } +static inline int __cold printk(const char *s, ...) { return 0; } #endif unsigned long int_sqrt(unsigned long); @@ -212,7 +212,7 @@ extern enum system_states { #define TAINT_USER (1<<6) #define TAINT_DIE (1<<7) -extern void dump_stack(void); +extern void dump_stack(void) __cold; enum { DUMP_PREFIX_NONE, -- cgit v1.2.3 From 3484d79813707bb6045773953a809abba443dc20 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Sat, 21 Jul 2007 17:10:32 +0200 Subject: x86_64: fake pxm-to-node mapping for fake numa For NUMA emulation, our SLIT should represent the true NUMA topology of the system but our proximity domain to node ID mapping needs to reflect the emulated state. When NUMA emulation has successfully setup fake nodes on the system, a new function, acpi_fake_nodes() is called. This function determines the proximity domain (_PXM) for each true node found on the system. It then finds which emulated nodes have been allocated on this true node as determined by its starting address. The node ID to PXM mapping is changed so that each fake node ID points to the PXM of the true node that it is located on. If the machine failed to register a SLIT, then we assume there is no special requirement for emulated node affinity so we use the default LOCAL_DISTANCE, which is newly exported to this code, as our measurement if the emulated nodes appear in the same PXM. Otherwise, we use REMOTE_DISTANCE. PXM_INVAL and NID_INVAL are also exported to the ACPI header file so that we can compare node_to_pxm() results in generic code (in this case, the SRAT code). Cc: Len Brown Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/acpi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e88b62e6b3aa..d5680cd7746a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -231,6 +231,9 @@ extern int acpi_paddr_to_node(u64 start_addr, u64 size); extern int pnpacpi_disabled; +#define PXM_INVAL (-1) +#define NID_INVAL (-1) + #else /* CONFIG_ACPI */ static inline int acpi_boot_init(void) -- cgit v1.2.3 From 44bf4cea43816d43deab73c1c16361e899996eaa Mon Sep 17 00:00:00 2001 From: Nigel Cunningham Date: Sat, 21 Jul 2007 17:10:41 +0200 Subject: x86: PM_TRACE support Signed-off-by: Nigel Cunningham Cc: Randy Dunlap Cc: "Rafael J. Wysocki" Cc: Pavel Machek Acked-by: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/resume-trace.h | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h index 81e9299ca148..f3f4f28c6960 100644 --- a/include/linux/resume-trace.h +++ b/include/linux/resume-trace.h @@ -2,6 +2,7 @@ #define RESUME_TRACE_H #ifdef CONFIG_PM_TRACE +#include extern int pm_trace_enabled; @@ -9,20 +10,10 @@ struct device; extern void set_trace_device(struct device *); extern void generate_resume_trace(void *tracedata, unsigned int user); -#define TRACE_DEVICE(dev) set_trace_device(dev) -#define TRACE_RESUME(user) do { \ - if (pm_trace_enabled) { \ - void *tracedata; \ - asm volatile("movl $1f,%0\n" \ - ".section .tracedata,\"a\"\n" \ - "1:\t.word %c1\n" \ - "\t.long %c2\n" \ - ".previous" \ - :"=r" (tracedata) \ - : "i" (__LINE__), "i" (__FILE__)); \ - generate_resume_trace(tracedata, user); \ - } \ -} while (0) +#define TRACE_DEVICE(dev) do { \ + if (pm_trace_enabled) \ + set_trace_device(dev); \ + } while(0) #else -- cgit v1.2.3 From 9585116ba09f1d8c52d0a1346e20bb9d443e9c02 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 21 Jul 2007 17:11:35 +0200 Subject: i386: fix iounmap's use of vm_struct's size field get_vm_area always returns an area with an adjacent guard page. That guard page is included in vm_struct.size. iounmap uses vm_struct.size to determine how much address space needs to have change_page_attr applied to it, which will BUG if applied to the guard page. This patch adds a helper function - get_vm_area_size() in linux/vmalloc.h - to return the actual size of a vm area, and uses it to make iounmap do the right thing. There are probably other places which should be using get_vm_area_size(). Thanks to Dave Young for debugging the problem. [ Andi, it wasn't clear to me whether x86_64 needs the same fix. ] Signed-off-by: Jeremy Fitzhardinge Cc: Dave Young Cc: Chuck Ebbert Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index c2b10cae5da5..89338b468d0d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -58,6 +58,13 @@ void vmalloc_sync_all(void); /* * Lowlevel-APIs (not for driver use!) */ + +static inline size_t get_vm_area_size(const struct vm_struct *area) +{ + /* return actual size without guard page */ + return area->size - PAGE_SIZE; +} + extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, unsigned long start, unsigned long end); -- cgit v1.2.3 From e97e2ddf07d6b6c2d621ddaec277e19f86c0cdb1 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Sat, 21 Jul 2007 19:07:33 -0700 Subject: [IrDA]: EP7211 IR driver port to the latest SIR API The EP7211 SIR driver was the only one left without a new SIR API port. Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- include/linux/irda.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irda.h b/include/linux/irda.h index 8e3735714c1c..28f88ecba344 100644 --- a/include/linux/irda.h +++ b/include/linux/irda.h @@ -77,6 +77,7 @@ typedef enum { IRDA_ACT200L_DONGLE = 10, IRDA_MA600_DONGLE = 11, IRDA_TOIM3232_DONGLE = 12, + IRDA_EP7211_DONGLE = 13, } IRDA_DONGLE; /* Protocol types to be used for SOCK_DGRAM */ -- cgit v1.2.3 From d5a2f2f1d68e2da538ac28540cddd9ccc733b001 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Wed, 18 Jul 2007 23:45:42 -0300 Subject: ACPI: thinkpad-acpi: store ThinkPad model information Keep note of ThinkPad model, BIOS and EC firmware information, and log it on startup. Makes for far more readable code in places, too. This patch also adds Lenovo's PCI ID to the pci ids table. Signed-off-by: Henrique de Moraes Holschuh Signed-off-by: Len Brown --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b15c6498fe67..ced4d3f76104 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2040,6 +2040,8 @@ #define PCI_DEVICE_ID_ALTIMA_AC9100 0x03ea #define PCI_DEVICE_ID_ALTIMA_AC1003 0x03eb +#define PCI_VENDOR_ID_LENOVO 0x17aa + #define PCI_VENDOR_ID_ARECA 0x17d3 #define PCI_DEVICE_ID_ARECA_1110 0x1110 #define PCI_DEVICE_ID_ARECA_1120 0x1120 -- cgit v1.2.3 From 8bf8df7120006b8c97ad3a9fcc79e2ba894c46dd Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Sun, 22 Jul 2007 00:23:03 +1000 Subject: [POWERPC] Constify of_platform_driver name Signed-off-by: Stephen Rothwell Acked-by: David S. Miller Signed-off-by: Paul Mackerras --- include/linux/of_platform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index 5fd44e63fb26..22c3837784b2 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -31,7 +31,7 @@ extern struct bus_type of_platform_bus_type; */ struct of_platform_driver { - char *name; + const char *name; struct of_device_id *match_table; struct module *owner; -- cgit v1.2.3 From 51d261122d0ffac8cf91cc6e74ffcfea23faeb1c Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Sun, 22 Jul 2007 00:27:01 +1000 Subject: [POWERPC] Constify of_platform_driver match_table Signed-off-by: Stephen Rothwell Acked-by: David S. Miller Signed-off-by: Paul Mackerras --- include/linux/of_platform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index 22c3837784b2..448f70b30a0c 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -32,7 +32,7 @@ extern struct bus_type of_platform_bus_type; struct of_platform_driver { const char *name; - struct of_device_id *match_table; + const struct of_device_id *match_table; struct module *owner; int (*probe)(struct of_device* dev, -- cgit v1.2.3 From 41e1703b9b88cf9b5e91cdd2f7dcded3ec3917cb Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sun, 22 Jul 2007 10:06:50 +0900 Subject: [SCSI] bsg: unexport sg v3 helper functions blk_fill_sghdr_rq, blk_unmap_sghdr_rq, and blk_complete_sghdr_rq were exported for bsg, however bsg was changed to support only sg v4. Signed-off-by: FUJITA Tomonori Signed-off-by: James Bottomley --- include/linux/blkdev.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f78965fc6426..695e34964cb7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -698,11 +698,6 @@ extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *, struct request *, int, rq_end_io_fn *); -extern int blk_fill_sghdr_rq(request_queue_t *, struct request *, - struct sg_io_hdr *, int); -extern int blk_unmap_sghdr_rq(struct request *, struct sg_io_hdr *); -extern int blk_complete_sghdr_rq(struct request *, struct sg_io_hdr *, - struct bio *); extern int blk_verify_command(unsigned char *, int); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) -- cgit v1.2.3 From 74f2345b6be1410f824cb7dd638d2c10a9709379 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 4 Jun 2007 17:00:14 -0400 Subject: [PATCH] allow audit filtering on bit & operations Right now the audit filter can match on = != > < >= blah blah blah. This allow the filter to also look at bitwise AND operations, & Signed-off-by: Eric Paris Signed-off-by: Al Viro --- include/linux/audit.h | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 8ca7ca0b47f0..a35859ab2fdb 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -161,7 +161,7 @@ * are currently used in an audit field constant understood by the kernel. * If you are adding a new #define AUDIT_, please ensure that * AUDIT_UNUSED_BITS is updated if need be. */ -#define AUDIT_UNUSED_BITS 0x0FFFFC00 +#define AUDIT_UNUSED_BITS 0x07FFFC00 /* Rule fields */ @@ -213,25 +213,29 @@ #define AUDIT_NEGATE 0x80000000 /* These are the supported operators. - * 4 2 1 - * = > < - * ------- - * 0 0 0 0 nonsense - * 0 0 1 1 < - * 0 1 0 2 > - * 0 1 1 3 != - * 1 0 0 4 = - * 1 0 1 5 <= - * 1 1 0 6 >= - * 1 1 1 7 all operators + * 4 2 1 8 + * = > < ? + * ---------- + * 0 0 0 0 00 nonsense + * 0 0 0 1 08 & bit mask + * 0 0 1 0 10 < + * 0 1 0 0 20 > + * 0 1 1 0 30 != + * 1 0 0 0 40 = + * 1 0 0 1 48 &= bit test + * 1 0 1 0 50 <= + * 1 1 0 0 60 >= + * 1 1 1 1 78 all operators */ +#define AUDIT_BIT_MASK 0x08000000 #define AUDIT_LESS_THAN 0x10000000 #define AUDIT_GREATER_THAN 0x20000000 #define AUDIT_NOT_EQUAL 0x30000000 #define AUDIT_EQUAL 0x40000000 +#define AUDIT_BIT_TEST (AUDIT_BIT_MASK|AUDIT_EQUAL) #define AUDIT_LESS_THAN_OR_EQUAL (AUDIT_LESS_THAN|AUDIT_EQUAL) #define AUDIT_GREATER_THAN_OR_EQUAL (AUDIT_GREATER_THAN|AUDIT_EQUAL) -#define AUDIT_OPERATORS (AUDIT_EQUAL|AUDIT_NOT_EQUAL) +#define AUDIT_OPERATORS (AUDIT_EQUAL|AUDIT_NOT_EQUAL|AUDIT_BIT_MASK) /* Status symbols */ /* Mask values */ -- cgit v1.2.3 From 4259fa01a2d2aa3e589b34ba7624080232d9c1ff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 Jun 2007 11:13:31 -0400 Subject: [PATCH] get rid of AVC_PATH postponed treatment Selinux folks had been complaining about the lack of AVC_PATH records when audit is disabled. I must admit my stupidity - I assumed that avc_audit() really couldn't use audit_log_d_path() because of deadlocks (== could be called with dcache_lock or vfsmount_lock held). Shouldn't have made that assumption - it never gets called that way. It _is_ called under spinlocks, but not those. Since audit_log_d_path() uses ab->gfp_mask for allocations, kmalloc() in there is not a problem. IOW, the simple fix is sufficient: let's rip AUDIT_AVC_PATH out and simply generate pathname as part of main record. It's trivial to do. Signed-off-by: Al Viro Acked-by: James Morris --- include/linux/audit.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index a35859ab2fdb..4bbd8601b8f0 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -411,7 +411,6 @@ extern int audit_bprm(struct linux_binprm *bprm); extern int audit_socketcall(int nargs, unsigned long *args); extern int audit_sockaddr(int len, void *addr); extern int __audit_fd_pair(int fd1, int fd2); -extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt); extern int audit_set_macxattr(const char *name); extern int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr); extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout); @@ -491,7 +490,6 @@ extern int audit_signals; #define audit_socketcall(n,a) ({ 0; }) #define audit_fd_pair(n,a) ({ 0; }) #define audit_sockaddr(len, addr) ({ 0; }) -#define audit_avc_path(dentry, mnt) ({ 0; }) #define audit_set_macxattr(n) do { ; } while (0) #define audit_mq_open(o,m,a) ({ 0; }) #define audit_mq_timedsend(d,l,p,t) ({ 0; }) -- cgit v1.2.3 From abd4f7505bafdd6c5319fe3cb5caf9af6104e17a Mon Sep 17 00:00:00 2001 From: Masoud Asgharifard Sharbiani Date: Sun, 22 Jul 2007 11:12:28 +0200 Subject: x86: i386-show-unhandled-signals-v3 This patch makes the i386 behave the same way that x86_64 does when a segfault happens. A line gets printed to the kernel log so that tools that need to check for failures can behave more uniformly between debug.show_unhandled_signals sysctl variable to 0 (or by doing echo 0 > /proc/sys/debug/exception-trace) Also, all of the lines being printed are now using printk_ratelimit() to deny the ability of DoS from a local user with a program like the following: main() { while (1) if (!fork()) *(int *)0 = 0; } This new revision also includes the fix that Andrew did which got rid of new sysctl that was added to the system in earlier versions of this. Also, 'show-unhandled-signals' sysctl has been renamed back to the old 'exception-trace' to avoid breakage of people's scripts. AK: Enabling by default for i386 will be likely controversal, but let's see what happens AK: Really folks, before complaining just fix your segfaults AK: I bet this will find a lot of silent issues Signed-off-by: Masoud Sharbiani Signed-off-by: Andi Kleen [ Personally, I've found the complaints useful on x86-64, so I'm all for this. That said, I wonder if we could do it more prettily.. -Linus ] Signed-off-by: Linus Torvalds --- include/linux/signal.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index ea91abe740da..0ae338866240 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -237,12 +237,15 @@ extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); extern long do_sigpending(void __user *, unsigned long); extern int sigprocmask(int, sigset_t *, sigset_t *); +extern int show_unhandled_signals; struct pt_regs; extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie); extern struct kmem_cache *sighand_cachep; +int unhandled_signal(struct task_struct *tsk, int sig); + /* * In POSIX a signal is sent either to a specific thread (Linux task) * or to the process as a whole (Linux thread group). How the signal -- cgit v1.2.3 From e9ed7e722e3f4cea07cf3c4bfe98c18180a17793 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Jul 2007 23:29:12 +0100 Subject: take declarations of enable_irq() et.al. to linux/interrupt.h Now that the last inlined instances are gone, all that is left to do is turning disable_irq_nosync on arm26 and m68k from defines to aliases and we are all set - we can make these externs in linux/interrupt.h uncoditional and kill remaining instances in asm/irq.h Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/interrupt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5323f6275854..0a3c2ebf2008 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -120,11 +120,11 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); # define local_irq_enable_in_hardirq() local_irq_enable() #endif -#ifdef CONFIG_GENERIC_HARDIRQS extern void disable_irq_nosync(unsigned int irq); extern void disable_irq(unsigned int irq); extern void enable_irq(unsigned int irq); +#ifdef CONFIG_GENERIC_HARDIRQS /* * Special lockdep variants of irq disabling/enabling. * These should be used for locking constructs that -- cgit v1.2.3