From 47376ceba54600cec4dd9e7c4fe8b98e4269633a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 16 Dec 2009 23:25:50 +0100 Subject: reiserfs: Fix reiserfs lock <-> inode mutex dependency inversion The reiserfs lock -> inode mutex dependency gets inverted when we relax the lock while walking to the tree. To fix this, use a specialized version of reiserfs_mutex_lock_safe that takes care of mutex subclasses. Then we can grab the inode mutex with I_MUTEX_XATTR subclass without any reiserfs lock dependency. This fixes the following report: [ INFO: possible circular locking dependency detected ] 2.6.32-06793-gf405425-dirty #2 ------------------------------------------------------- mv/18566 is trying to acquire lock: (&REISERFS_SB(s)->lock){+.+.+.}, at: [] reiserfs_write_lock+0x28= /0x40 but task is already holding lock: (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [] reiserfs_for_each_xattr+0x10c/0x380 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&sb->s_type->i_mutex_key#5/3){+.+.+.}: [] validate_chain+0xa23/0xf70 [] __lock_acquire+0x4e5/0xa70 [] lock_acquire+0x7a/0xa0 [] mutex_lock_nested+0x5f/0x2b0 [] reiserfs_for_each_xattr+0x84/0x380 [] reiserfs_delete_xattrs+0x15/0x50 [] reiserfs_delete_inode+0x8f/0x140 [] generic_delete_inode+0x9c/0x150 [] generic_drop_inode+0x3d/0x60 [] iput+0x47/0x50 [] do_unlinkat+0xdb/0x160 [] sys_unlink+0x10/0x20 [] sysenter_do_call+0x12/0x36 -> #0 (&REISERFS_SB(s)->lock){+.+.+.}: [] validate_chain+0xf68/0xf70 [] __lock_acquire+0x4e5/0xa70 [] lock_acquire+0x7a/0xa0 [] mutex_lock_nested+0x5f/0x2b0 [] reiserfs_write_lock+0x28/0x40 [] search_by_key+0x1f7b/0x21b0 [] search_by_entry_key+0x1f/0x3b0 [] reiserfs_find_entry+0x77/0x400 [] reiserfs_lookup+0x85/0x130 [] __lookup_hash+0xb4/0x110 [] lookup_one_len+0xb3/0x100 [] reiserfs_for_each_xattr+0x120/0x380 [] reiserfs_delete_xattrs+0x15/0x50 [] reiserfs_delete_inode+0x8f/0x140 [] generic_delete_inode+0x9c/0x150 [] generic_drop_inode+0x3d/0x60 [] iput+0x47/0x50 [] dentry_iput+0x6f/0xf0 [] d_kill+0x24/0x50 [] dput+0x5b/0x120 [] sys_renameat+0x1b9/0x230 [] sys_rename+0x28/0x30 [] sysenter_do_call+0x12/0x36 other info that might help us debug this: 2 locks held by mv/18566: #0: (&sb->s_type->i_mutex_key#5/1){+.+.+.}, at: [] lock_rename+0xcc/0xd0 #1: (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [] reiserfs_for_each_xattr+0x10c/0x380 stack backtrace: Pid: 18566, comm: mv Tainted: G C 2.6.32-06793-gf405425-dirty #2 Call Trace: [] ? printk+0x18/0x1e [] print_circular_bug+0xc0/0xd0 [] validate_chain+0xf68/0xf70 [] ? trace_hardirqs_off+0xb/0x10 [] __lock_acquire+0x4e5/0xa70 [] lock_acquire+0x7a/0xa0 [] ? reiserfs_write_lock+0x28/0x40 [] mutex_lock_nested+0x5f/0x2b0 [] ? reiserfs_write_lock+0x28/0x40 [] ? reiserfs_write_lock+0x28/0x40 [] ? schedule+0x27a/0x440 [] reiserfs_write_lock+0x28/0x40 [] search_by_key+0x1f7b/0x21b0 [] ? __lock_acquire+0x506/0xa70 [] ? lock_release_non_nested+0x1e7/0x340 [] ? reiserfs_write_lock+0x28/0x40 [] ? trace_hardirqs_on_caller+0x124/0x170 [] ? trace_hardirqs_on+0xb/0x10 [] ? T.316+0x15/0x1a0 [] ? sched_clock_cpu+0x9d/0x100 [] search_by_entry_key+0x1f/0x3b0 [] ? __mutex_unlock_slowpath+0x9a/0x120 [] ? trace_hardirqs_on_caller+0x124/0x170 [] reiserfs_find_entry+0x77/0x400 [] reiserfs_lookup+0x85/0x130 [] ? sched_clock_cpu+0x9d/0x100 [] __lookup_hash+0xb4/0x110 [] lookup_one_len+0xb3/0x100 [] reiserfs_for_each_xattr+0x120/0x380 [] ? delete_one_xattr+0x0/0x1c0 [] ? math_error+0x22/0x150 [] ? reiserfs_write_lock+0x28/0x40 [] reiserfs_delete_xattrs+0x15/0x50 [] ? reiserfs_write_lock+0x28/0x40 [] reiserfs_delete_inode+0x8f/0x140 [] ? generic_delete_inode+0x5f/0x150 [] ? reiserfs_delete_inode+0x0/0x140 [] generic_delete_inode+0x9c/0x150 [] generic_drop_inode+0x3d/0x60 [] iput+0x47/0x50 [] dentry_iput+0x6f/0xf0 [] d_kill+0x24/0x50 [] dput+0x5b/0x120 [] sys_renameat+0x1b9/0x230 [] ? sched_clock_cpu+0x9d/0x100 [] ? trace_hardirqs_off+0xb/0x10 [] ? cpu_clock+0x4e/0x60 [] ? do_page_fault+0x155/0x370 [] ? up_read+0x16/0x30 [] ? do_page_fault+0x155/0x370 [] sys_rename+0x28/0x30 [] sysenter_do_call+0x12/0x36 Reported-by: Alexander Beregalov Signed-off-by: Frederic Weisbecker Cc: Chris Mason Cc: Ingo Molnar Cc: Thomas Gleixner --- include/linux/reiserfs_fs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index a05b4a20768d..4351b49e2b1e 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -97,6 +97,15 @@ static inline void reiserfs_mutex_lock_safe(struct mutex *m, reiserfs_write_lock(s); } +static inline void +reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, + struct super_block *s) +{ + reiserfs_write_unlock(s); + mutex_lock_nested(m, subclass); + reiserfs_write_lock(s); +} + /* * When we schedule, we usually want to also release the write lock, * according to the previous bkl based locking scheme of reiserfs. -- cgit v1.2.3 From 2d1c861871d767153538a77c498752b36d4bb4b8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 9 Dec 2009 17:52:13 +1100 Subject: PCI/cardbus: Add a fixup hook and fix powerpc The cardbus code creates PCI devices without ever going through the necessary fixup bits and pieces that normal PCI devices go through. There's in fact a commented out call to pcibios_fixup_bus() in there, it's commented because ... it doesn't work. I could make pcibios_fixup_bus() do the right thing on powerpc easily but I felt it cleaner instead to provide a specific hook pci_fixup_cardbus for which a weak empty implementation is provided by the PCI core. This fixes cardbus on powerbooks and probably all other PowerPC platforms which was broken completely for ever on some platforms and since 2.6.31 on others such as PowerBooks when we made the DMA ops mandatory (since those are setup by the fixups). Acked-by: Dominik Brodowski Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Jesse Barnes --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index bf1e67080849..5da0690d9cee 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -566,6 +566,9 @@ void pcibios_align_resource(void *, struct resource *, resource_size_t, resource_size_t); void pcibios_update_irq(struct pci_dev *, int irq); +/* Weak but can be overriden by arch */ +void pci_fixup_cardbus(struct pci_bus *); + /* Generic PCI functions used internally */ extern struct pci_bus *pci_find_bus(int domain, int busnr); -- cgit v1.2.3 From 4f924ba5b5aaf1477daafeae779495d39549186d Mon Sep 17 00:00:00 2001 From: Mattia Dongili Date: Thu, 17 Dec 2009 00:08:33 +0900 Subject: sony-laptop: add AVMode key mapping Some models are equipped with an "AVMode" function key that sends sony-laptop: Unknown event: 0x100 0xa1 sony-laptop: Unknown event: 0x100 0x21 for press and release respectively. Cc: "Matthew W. S. Bell" Cc: Dmitry Torokhov Signed-off-by: Mattia Dongili Signed-off-by: Len Brown --- include/linux/sonypi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sonypi.h b/include/linux/sonypi.h index 34c4475ac4a2..4f95c1aac2fd 100644 --- a/include/linux/sonypi.h +++ b/include/linux/sonypi.h @@ -111,6 +111,7 @@ #define SONYPI_EVENT_VOLUME_INC_PRESSED 69 #define SONYPI_EVENT_VOLUME_DEC_PRESSED 70 #define SONYPI_EVENT_BRIGHTNESS_PRESSED 71 +#define SONYPI_EVENT_MEDIA_PRESSED 72 /* get/set brightness */ #define SONYPI_IOCGBRT _IOR('v', 0, __u8) -- cgit v1.2.3 From 8c0414cd524e9f1c483ffb3ff1c2d860f5c567c8 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 3 Dec 2009 12:46:51 -0800 Subject: fiemap: Add new extent flag FIEMAP_EXTENT_SHARED Some filesystems may allow multiple files to point to a particular extent. This patch adds flag FIEMAP_EXTENT_SHARED to denote extents that are shared with other inodes. Signed-off-by: Sunil Mushran Acked-by: Mark Fasheh Signed-off-by: Joel Becker --- include/linux/fiemap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h index 934e22d65801..d830747f5c0b 100644 --- a/include/linux/fiemap.h +++ b/include/linux/fiemap.h @@ -62,5 +62,7 @@ struct fiemap { #define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively * support extents. Result * merged for efficiency. */ +#define FIEMAP_EXTENT_SHARED 0x00002000 /* Space shared with other + * files. */ #endif /* _LINUX_FIEMAP_H */ -- cgit v1.2.3 From cc3e1bea5d87635c519da657303690f5538bb4eb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 23 Dec 2009 06:52:08 -0500 Subject: ext4, jbd2: Add barriers for file systems with exernal journals This is a bit complicated because we are trying to optimize when we send barriers to the fs data disk. We could just throw in an extra barrier to the data disk whenever we send a barrier to the journal disk, but that's not always strictly necessary. We only need to send a barrier during a commit when there are data blocks which are must be written out due to an inode written in ordered mode, or if fsync() depends on the commit to force data blocks to disk. Finally, before we drop transactions from the beginning of the journal during a checkpoint operation, we need to guarantee that any blocks that were flushed out to the data disk are firmly on the rust platter before we drop the transaction from the journal. Thanks to Oleg Drokin for pointing out this flaw in ext3/ext4. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f1011f7f3d41..638ce4554c76 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -653,6 +653,7 @@ struct transaction_s * waiting for it to finish. */ unsigned int t_synchronous_commit:1; + unsigned int t_flushed_data_blocks:1; /* * For use by the filesystem to store fs-specific data -- cgit v1.2.3 From 17bd55d037a02b04d9119511cfd1a4b985d20f63 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 23 Dec 2009 07:57:07 -0500 Subject: fs-writeback: Add helper function to start writeback if idle ext4, at least, would like to start pushing on writeback if it starts to get close to ENOSPC when reserving worst-case blocks for delalloc writes. Writing out delalloc data will convert those worst-case predictions into usually smaller actual usage, freeing up space before we hit ENOSPC based on this speculation. Thanks to Jens for the suggestion for the helper function, & the naming help. I've made the helper return status on whether writeback was started even though I don't plan to use it in the ext4 patch; it seems like it would be potentially useful to test this in some cases. Signed-off-by: Eric Sandeen Acked-by: Jan Kara --- include/linux/writeback.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index c18c008f4bbf..76e8903cd204 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -70,6 +70,7 @@ struct writeback_control { struct bdi_writeback; int inode_wait(void *); void writeback_inodes_sb(struct super_block *); +int writeback_inodes_sb_if_idle(struct super_block *); void sync_inodes_sb(struct super_block *); void writeback_inodes_wbc(struct writeback_control *wbc); long wb_do_writeback(struct bdi_writeback *wb, int force_wait); -- cgit v1.2.3 From 28f6aeea3f12d37bd258b2c0d5ba891bff4ec479 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Fri, 25 Dec 2009 17:30:22 -0800 Subject: net: restore ip source validation when using policy routing and the skb mark: there are cases where a back path validation requires us to use a different routing table for src ip validation than the one used for mapping ingress dst ip. One such a case is transparent proxying where we pretend to be the destination system and therefore the local table is used for incoming packets but possibly a main table would be used on outbound. Make the default behavior to allow the above and if users need to turn on the symmetry via sysctl src_valid_mark Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 1 + include/linux/sysctl.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 699e85c01a4d..b2304929434e 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -81,6 +81,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) #define IN_DEV_MFORWARD(in_dev) IN_DEV_ANDCONF((in_dev), MC_FORWARDING) #define IN_DEV_RPFILTER(in_dev) IN_DEV_MAXCONF((in_dev), RP_FILTER) +#define IN_DEV_SRC_VMARK(in_dev) IN_DEV_ORCONF((in_dev), SRC_VMARK) #define IN_DEV_SOURCE_ROUTE(in_dev) IN_DEV_ANDCONF((in_dev), \ ACCEPT_SOURCE_ROUTE) #define IN_DEV_ACCEPT_LOCAL(in_dev) IN_DEV_ORCONF((in_dev), ACCEPT_LOCAL) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 877ba039e6a4..bd27fbc9db62 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -482,6 +482,7 @@ enum NET_IPV4_CONF_ARP_ACCEPT=21, NET_IPV4_CONF_ARP_NOTIFY=22, NET_IPV4_CONF_ACCEPT_LOCAL=23, + NET_IPV4_CONF_SRC_VMARK=24, __NET_IPV4_CONF_MAX }; -- cgit v1.2.3 From 81744ee44ab2845c16ffd7d6f762f7b4a49a4750 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 29 Dec 2009 08:35:35 +0100 Subject: block: Fix incorrect alignment offset reporting and update documentation queue_sector_alignment_offset returned the wrong value which caused partitions to report an incorrect alignment_offset. Since offset alignment calculation is needed several places it has been split into a separate helper function. The topology stacking function has been updated accordingly. Furthermore, comments have been added to clarify how the stacking function works. Signed-off-by: Martin K. Petersen Tested-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 784a919aa0d0..59b832be3044 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1116,11 +1116,18 @@ static inline int queue_alignment_offset(struct request_queue *q) return q->limits.alignment_offset; } +static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t offset) +{ + unsigned int granularity = max(lim->physical_block_size, lim->io_min); + + offset &= granularity - 1; + return (granularity + lim->alignment_offset - offset) & (granularity - 1); +} + static inline int queue_sector_alignment_offset(struct request_queue *q, sector_t sector) { - return ((sector << 9) - q->limits.alignment_offset) - & (q->limits.io_min - 1); + return queue_limit_alignment_offset(&q->limits, sector << 9); } static inline int bdev_alignment_offset(struct block_device *bdev) -- cgit v1.2.3 From db5d247ae811f49185a71e703b65acad845e4b18 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Thu, 24 Dec 2009 12:05:58 +0100 Subject: firewire: fix use of multiple AV/C devices, allow multiple FCP listeners Control of more than one AV/C device at once --- e.g. camcorders, tape decks, audio devices, TV tuners --- failed or worked only unreliably, depending on driver implementation. This affected kernelspace and userspace drivers alike and was caused by firewire-core's inability to accept multiple registrations of FCP listeners. The fix allows multiple address handlers to be registered for the FCP command and response registers. When a request for these registers is received, all handlers are invoked, and the Firewire response is generated by the core and not by any handler. The cdev API does not change, i.e., userspace is still expected to send a response for FCP requests; this response is silently ignored. Signed-off-by: Clemens Ladisch Signed-off-by: Stefan Richter (changelog, rebased, whitespace) --- include/linux/firewire-cdev.h | 3 +++ include/linux/firewire.h | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index c6b3ca3af6df..1f716d9f714b 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -340,6 +340,9 @@ struct fw_cdev_send_response { * The @closure field is passed back to userspace in the response event. * The @handle field is an out parameter, returning a handle to the allocated * range to be used for later deallocation of the range. + * + * The address range is allocated on all local nodes. The address allocation + * is exclusive except for the FCP command and response registers. */ struct fw_cdev_allocate { __u64 offset; diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 9416a461b696..a0e67150a729 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -248,8 +248,8 @@ typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode, void *data, size_t length, void *callback_data); /* - * Important note: The callback must guarantee that either fw_send_response() - * or kfree() is called on the @request. + * Important note: Except for the FCP registers, the callback must guarantee + * that either fw_send_response() or kfree() is called on the @request. */ typedef void (*fw_address_callback_t)(struct fw_card *card, struct fw_request *request, -- cgit v1.2.3 From 9bd3f98821a83041e77ee25158b80b535d02d7b4 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Wed, 30 Dec 2009 08:41:07 +0100 Subject: block: blk_rq_err_sectors cleanup blk_rq_err_sectors() seems useless, get rid of it. Signed-off-by: Gui Jianfeng Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 59b832be3044..9b98173a8184 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -845,7 +845,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) * blk_rq_err_bytes() : bytes left till the next error boundary * blk_rq_sectors() : sectors left in the entire request * blk_rq_cur_sectors() : sectors left in the current segment - * blk_rq_err_sectors() : sectors left till the next error boundary */ static inline sector_t blk_rq_pos(const struct request *rq) { @@ -874,11 +873,6 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) return blk_rq_cur_bytes(rq) >> 9; } -static inline unsigned int blk_rq_err_sectors(const struct request *rq) -{ - return blk_rq_err_bytes(rq) >> 9; -} - /* * Request issue related functions. */ -- cgit v1.2.3 From e96dc9674cb597de4fee757ed005c8465072d13f Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:26 +0800 Subject: tracing/syscalls: Fix typo in SYSCALL_DEFINE0 The struct syscall_metadata variable name in SYSCALL_DEFINE0 should be __syscall_meta__##sname instead of __syscall_meta_##sname to match the name that is in SYSCALL_DEFINE1/2/3/4/5/6. This error causes event_enter_##sname->data to point to the wrong location, which causes syscalls which are defined by SYSCALL_DEFINE0() not to be traced. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D2E.1010807@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 65793e90d6f6..207466a49f3d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -195,7 +195,7 @@ struct perf_event_attr; static const struct syscall_metadata __used \ __attribute__((__aligned__(4))) \ __attribute__((section("__syscalls_metadata"))) \ - __syscall_meta_##sname = { \ + __syscall_meta__##sname = { \ .name = "sys_"#sname, \ .nb_args = 0, \ .enter_event = &event_enter__##sname, \ -- cgit v1.2.3 From ed656d8deccc5669afa33387568e7ec6f14e3e94 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Sat, 26 Dec 2009 17:58:11 +0100 Subject: kfifo: Fix typo in comment It's DECLARE_KFIFO, not DECLARED_KFIFO. Signed-off-by: Rolf Eike Beer Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 3d44e9c65a8e..7c6b32a1421c 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -81,7 +81,7 @@ union { \ } /** - * INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO + * INIT_KFIFO - Initialize a kfifo declared by DECLARE_KFIFO * @name: name of the declared kfifo datatype */ #define INIT_KFIFO(name) \ -- cgit v1.2.3 From d7f0eea9e431e1b8b0742a74db1a9490730b2a25 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 30 Dec 2009 15:36:42 +0800 Subject: ACPI: introduce kernel parameter acpi_sleep=sci_force_enable Introduce kernel parameter acpi_sleep=sci_force_enable some laptop requires SCI_EN being set directly on resume, or else they hung somewhere in the resume code path. We already have a blacklist for these laptops but we still need this option, especially when debugging some suspend/resume problems, in case there are systems that need this workaround and are not yet in the blacklist. Signed-off-by: Zhang Rui Acked-by: Rafael J. Wysocki Signed-off-by: Len Brown --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ce945d4845fc..36924255c0d5 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -251,6 +251,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n, void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); void __init acpi_s4_no_nvs(void); +void __init acpi_set_sci_en_on_resume(void); #endif /* CONFIG_PM_SLEEP */ struct acpi_osc_context { -- cgit v1.2.3 From 2f5cb43406d0b29b96248f5328a14a6f6abf8ae6 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 30 Dec 2009 08:23:30 +0000 Subject: phylib: Properly reinitialize PHYs after hibernation Since hibernation assumes power loss, we should fully reinitialize PHYs (including platform fixups), as if PHYs were just attached. This patch factors phy_init_hw() out of phy_attach_direct(), then converts mdio_bus to dev_pm_ops and adds an appropriate restore() callback. Signed-off-by: Anton Vorontsov Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index b1368b8f6572..7968defd2fa7 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -447,6 +447,7 @@ struct phy_device* get_phy_device(struct mii_bus *bus, int addr); int phy_device_register(struct phy_device *phy); int phy_clear_interrupt(struct phy_device *phydev); int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); +int phy_init_hw(struct phy_device *phydev); int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, u32 flags, phy_interface_t interface); struct phy_device * phy_attach(struct net_device *dev, -- cgit v1.2.3 From 0719d3434747889b314a1e8add776418c4148bcf Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 30 Dec 2009 00:39:22 +0100 Subject: reiserfs: Fix reiserfs lock <-> i_xattr_sem dependency inversion i_xattr_sem depends on the reiserfs lock. But after we grab i_xattr_sem, we may relax/relock the reiserfs lock while waiting on a freezed filesystem, creating a dependency inversion between the two locks. In order to avoid the i_xattr_sem -> reiserfs lock dependency, let's create a reiserfs_down_read_safe() that acts like reiserfs_mutex_lock_safe(): relax the reiserfs lock while grabbing another lock to avoid undesired dependencies induced by the heivyweight reiserfs lock. This fixes the following warning: [ 990.005931] ======================================================= [ 990.012373] [ INFO: possible circular locking dependency detected ] [ 990.013233] 2.6.33-rc1 #1 [ 990.013233] ------------------------------------------------------- [ 990.013233] dbench/1891 is trying to acquire lock: [ 990.013233] (&REISERFS_SB(s)->lock){+.+.+.}, at: [] reiserfs_write_lock+0x35/0x50 [ 990.013233] [ 990.013233] but task is already holding lock: [ 990.013233] (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [] reiserfs_xattr_set_handle+0x8a/0x470 [ 990.013233] [ 990.013233] which lock already depends on the new lock. [ 990.013233] [ 990.013233] [ 990.013233] the existing dependency chain (in reverse order) is: [ 990.013233] [ 990.013233] -> #1 (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}: [ 990.013233] [] __lock_acquire+0xf9c/0x1560 [ 990.013233] [] lock_acquire+0x8f/0xb0 [ 990.013233] [] down_write+0x44/0x80 [ 990.013233] [] reiserfs_xattr_set_handle+0x8a/0x470 [ 990.013233] [] reiserfs_xattr_set+0xb0/0x150 [ 990.013233] [] user_set+0x8a/0x90 [ 990.013233] [] reiserfs_setxattr+0xaa/0xb0 [ 990.013233] [] __vfs_setxattr_noperm+0x36/0xa0 [ 990.013233] [] vfs_setxattr+0xbc/0xc0 [ 990.013233] [] setxattr+0xc0/0x150 [ 990.013233] [] sys_fsetxattr+0x8d/0xa0 [ 990.013233] [] system_call_fastpath+0x16/0x1b [ 990.013233] [ 990.013233] -> #0 (&REISERFS_SB(s)->lock){+.+.+.}: [ 990.013233] [] __lock_acquire+0x12d0/0x1560 [ 990.013233] [] lock_acquire+0x8f/0xb0 [ 990.013233] [] __mutex_lock_common+0x47/0x3b0 [ 990.013233] [] mutex_lock_nested+0x3e/0x50 [ 990.013233] [] reiserfs_write_lock+0x35/0x50 [ 990.013233] [] reiserfs_prepare_write+0x45/0x180 [ 990.013233] [] reiserfs_xattr_set_handle+0x2a6/0x470 [ 990.013233] [] reiserfs_xattr_set+0xb0/0x150 [ 990.013233] [] user_set+0x8a/0x90 [ 990.013233] [] reiserfs_setxattr+0xaa/0xb0 [ 990.013233] [] __vfs_setxattr_noperm+0x36/0xa0 [ 990.013233] [] vfs_setxattr+0xbc/0xc0 [ 990.013233] [] setxattr+0xc0/0x150 [ 990.013233] [] sys_fsetxattr+0x8d/0xa0 [ 990.013233] [] system_call_fastpath+0x16/0x1b [ 990.013233] [ 990.013233] other info that might help us debug this: [ 990.013233] [ 990.013233] 2 locks held by dbench/1891: [ 990.013233] #0: (&sb->s_type->i_mutex_key#12){+.+.+.}, at: [] vfs_setxattr+0x78/0xc0 [ 990.013233] #1: (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [] reiserfs_xattr_set_handle+0x8a/0x470 [ 990.013233] [ 990.013233] stack backtrace: [ 990.013233] Pid: 1891, comm: dbench Not tainted 2.6.33-rc1 #1 [ 990.013233] Call Trace: [ 990.013233] [] print_circular_bug+0xe9/0xf0 [ 990.013233] [] __lock_acquire+0x12d0/0x1560 [ 990.013233] [] ? reiserfs_xattr_set_handle+0x8a/0x470 [ 990.013233] [] lock_acquire+0x8f/0xb0 [ 990.013233] [] ? reiserfs_write_lock+0x35/0x50 [ 990.013233] [] ? reiserfs_xattr_set_handle+0x8a/0x470 [ 990.013233] [] __mutex_lock_common+0x47/0x3b0 [ 990.013233] [] ? reiserfs_write_lock+0x35/0x50 [ 990.013233] [] ? reiserfs_write_lock+0x35/0x50 [ 990.013233] [] ? mark_held_locks+0x72/0xa0 [ 990.013233] [] ? __mutex_unlock_slowpath+0xbd/0x140 [ 990.013233] [] ? trace_hardirqs_on_caller+0x14d/0x1a0 [ 990.013233] [] mutex_lock_nested+0x3e/0x50 [ 990.013233] [] reiserfs_write_lock+0x35/0x50 [ 990.013233] [] reiserfs_prepare_write+0x45/0x180 [ 990.013233] [] reiserfs_xattr_set_handle+0x2a6/0x470 [ 990.013233] [] reiserfs_xattr_set+0xb0/0x150 [ 990.013233] [] ? __mutex_lock_common+0x284/0x3b0 [ 990.013233] [] user_set+0x8a/0x90 [ 990.013233] [] reiserfs_setxattr+0xaa/0xb0 [ 990.013233] [] __vfs_setxattr_noperm+0x36/0xa0 [ 990.013233] [] vfs_setxattr+0xbc/0xc0 [ 990.013233] [] setxattr+0xc0/0x150 [ 990.013233] [] ? sched_clock_cpu+0xb8/0x100 [ 990.013233] [] ? trace_hardirqs_off+0xd/0x10 [ 990.013233] [] ? cpu_clock+0x43/0x50 [ 990.013233] [] ? fget+0xb0/0x110 [ 990.013233] [] ? fget+0x0/0x110 [ 990.013233] [] ? sysret_check+0x27/0x62 [ 990.013233] [] sys_fsetxattr+0x8d/0xa0 [ 990.013233] [] system_call_fastpath+0x16/0x1b Reported-and-tested-by: Christian Kujau Signed-off-by: Frederic Weisbecker Cc: Alexander Beregalov Cc: Chris Mason Cc: Ingo Molnar --- include/linux/reiserfs_fs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 4351b49e2b1e..35d3f459b0ac 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -106,6 +106,14 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, reiserfs_write_lock(s); } +static inline void +reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s) +{ + reiserfs_write_unlock(s); + down_read(sem); + reiserfs_write_lock(s); +} + /* * When we schedule, we usually want to also release the write lock, * according to the previous bkl based locking scheme of reiserfs. -- cgit v1.2.3 From c4a62ca362258d98f42efb282cfbf9b61caffdbe Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 30 Dec 2009 03:20:19 +0100 Subject: reiserfs: Warn on lock relax if taken recursively When we relax the reiserfs lock to avoid creating unwanted dependencies against others locks while grabbing these, we want to ensure it has not been taken recursively, otherwise the lock won't be really relaxed. Only its depth will be decreased. The unwanted dependency would then actually happen. To prevent from that, add a reiserfs_lock_check_recursive() call in the places that need it. Signed-off-by: Frederic Weisbecker Cc: Alexander Beregalov Cc: Chris Mason Cc: Ingo Molnar --- include/linux/reiserfs_fs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 35d3f459b0ac..793bf8351ab8 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -62,6 +62,12 @@ void reiserfs_write_unlock(struct super_block *s); int reiserfs_write_lock_once(struct super_block *s); void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); +#ifdef CONFIG_REISERFS_CHECK +void reiserfs_lock_check_recursive(struct super_block *s); +#else +static inline void reiserfs_lock_check_recursive(struct super_block *s) { } +#endif + /* * Several mutexes depend on the write lock. * However sometimes we want to relax the write lock while we hold @@ -92,6 +98,7 @@ void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); static inline void reiserfs_mutex_lock_safe(struct mutex *m, struct super_block *s) { + reiserfs_lock_check_recursive(s); reiserfs_write_unlock(s); mutex_lock(m); reiserfs_write_lock(s); @@ -101,6 +108,7 @@ static inline void reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, struct super_block *s) { + reiserfs_lock_check_recursive(s); reiserfs_write_unlock(s); mutex_lock_nested(m, subclass); reiserfs_write_lock(s); @@ -109,6 +117,7 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, static inline void reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s) { + reiserfs_lock_check_recursive(s); reiserfs_write_unlock(s); down_read(sem); reiserfs_write_lock(s); -- cgit v1.2.3 From 96d07d211739fd2450ac54e81d00fa40fcd4b1bd Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 20 Nov 2009 14:16:33 +0100 Subject: resource: move kernel function inside __KERNEL__ It is an internal function. Move it inside __KERNEL__ ifdef, along with task_struct declaration. Then we get: --- /usr/include/linux/resource.h 2009-09-14 15:09:29.000000000 +0200 +++ usr/include/linux/resource.h 2010-01-04 11:30:54.000000000 +0100 @@ -3,8 +3,6 @@ #include -struct task_struct; - /* * Resource control/accounting header file for linux */ @@ -70,6 +68,5 @@ */ #include -int getrusage(struct task_struct *p, int who, struct rusage *ru); #endif *********** include/linux/Kbuild is untouched, since unifdef is run even on headers-y nowadays. Signed-off-by: Jiri Slaby --- include/linux/resource.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/resource.h b/include/linux/resource.h index 40fc7e626082..f1e914eefeab 100644 --- a/include/linux/resource.h +++ b/include/linux/resource.h @@ -3,8 +3,6 @@ #include -struct task_struct; - /* * Resource control/accounting header file for linux */ @@ -70,6 +68,12 @@ struct rlimit { */ #include +#ifdef __KERNEL__ + +struct task_struct; + int getrusage(struct task_struct *p, int who, struct rusage __user *ru); +#endif /* __KERNEL__ */ + #endif -- cgit v1.2.3 From 3e10e716abf3c71bdb5d86b8f507f9e72236c9cd Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 19 Nov 2009 17:16:37 +0100 Subject: resource: add helpers for fetching rlimits We want to be sure that compiler fetches the limit variable only once, so add helpers for fetching current and maximal resource limits which do that. Add them to sched.h (instead of resource.h) due to circular dependency sched.h->resource.h->task_struct Alternative would be to create a separate res_access.h or similar. Signed-off-by: Jiri Slaby Cc: James Morris Cc: Heiko Carstens Cc: Andrew Morton Cc: Ingo Molnar --- include/linux/sched.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f2f842db03ce..8d4991be9d53 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2601,6 +2601,28 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) } #endif /* CONFIG_MM_OWNER */ +static inline unsigned long task_rlimit(const struct task_struct *tsk, + unsigned int limit) +{ + return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur); +} + +static inline unsigned long task_rlimit_max(const struct task_struct *tsk, + unsigned int limit) +{ + return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max); +} + +static inline unsigned long rlimit(unsigned int limit) +{ + return task_rlimit(current, limit); +} + +static inline unsigned long rlimit_max(unsigned int limit) +{ + return task_rlimit_max(current, limit); +} + #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From 1ae861e652b5457e7fa98ccbc55abea1e207916e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 31 Dec 2009 12:15:54 +0100 Subject: PCI/PM: Use per-device D3 delays It turns out that some PCI devices require extra delays when changing power state from D3 to D0 (and the other way around). Although this is against the PCI specification, we can handle it quite easily by allowing drivers to define arbitrary D3 delays for devices known to require extra time for switching power states. Introduce additional field d3_delay in struct pci_dev and use it to store the value of the device's D0->D3 delay, in miliseconds. Make the PCI PM core code use the per-device d3_delay unless pci_pm_d3_delay is greater (in which case the latter is used). [This also allows the driver to specify d3_delay shorter than the 10 ms required by the PCI standard if the device is known to be able to handle that.] Make the sky2 driver set d3_delay to 150 for devices handled by it. Fixes http://bugzilla.kernel.org/show_bug.cgi?id=14730 which is a listed regression from 2.6.30. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 5da0690d9cee..174e5392e51e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -243,6 +243,7 @@ struct pci_dev { unsigned int d2_support:1; /* Low power state D2 is supported */ unsigned int no_d1d2:1; /* Only allow D0 and D3 */ unsigned int wakeup_prepared:1; + unsigned int d3_delay; /* D3->D0 transition time in ms */ #ifdef CONFIG_PCIEASPM struct pcie_link_state *link_state; /* ASPM link state. */ -- cgit v1.2.3 From 59b015133cd0034f5904a76969d73476380aac46 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 5 Jan 2010 17:56:02 -0800 Subject: Input: serio - fix potential deadlock when unbinding drivers sysfs_remove_group() waits for sysfs attributes to be removed, therefore we do not need to worry about driver-specific attributes being accessed after driver has been detached from the device. In fact, attempts to take serio->drv_mutex in attribute methods may lead to the following deadlock: sysfs_read_file() fill_read_buffer() sysfs_get_active_two() psmouse_attr_show_helper() serio_pin_driver() serio_disconnect_driver() mutex_lock(&serio->drv_mutex); <--------> mutex_lock(&serio_drv_mutex); psmouse_disconnect() sysfs_remove_group(... psmouse_attr_group); .... sysfs_deactivate(); wait_for_completion(); Fix this by removing calls to serio_[un]pin_driver() and functions themselves and using driver-private mutexes to serialize access to attribute's set() methods that may change device state. Signed-off-by: Eric W. Biederman Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serio.h b/include/linux/serio.h index e2f3044d4a4a..813d26c247ec 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -136,25 +136,6 @@ static inline void serio_continue_rx(struct serio *serio) spin_unlock_irq(&serio->lock); } -/* - * Use the following functions to pin serio's driver in process context - */ -static inline int serio_pin_driver(struct serio *serio) -{ - return mutex_lock_interruptible(&serio->drv_mutex); -} - -static inline void serio_pin_driver_uninterruptible(struct serio *serio) -{ - mutex_lock(&serio->drv_mutex); -} - -static inline void serio_unpin_driver(struct serio *serio) -{ - mutex_unlock(&serio->drv_mutex); -} - - #endif /* -- cgit v1.2.3 From 8558e3943df1c51c3377cb4e8a52ea484d6f357d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 6 Jan 2010 16:11:06 -0500 Subject: x86, ACPI: delete acpi_boot_table_init() return value cleanup only. setup_arch(), doesn't care care if ACPI initialization succeeded or failed, so delete acpi_boot_table_init()'s return value. Signed-off-by: Len Brown --- include/linux/acpi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 36924255c0d5..b926afe8c03e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -80,7 +80,7 @@ char * __acpi_map_table (unsigned long phys_addr, unsigned long size); void __acpi_unmap_table(char *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); -int acpi_boot_table_init (void); +void acpi_boot_table_init (void); int acpi_mps_check (void); int acpi_numa_init (void); @@ -321,9 +321,9 @@ static inline int acpi_boot_init(void) return 0; } -static inline int acpi_boot_table_init(void) +static inline void acpi_boot_table_init(void) { - return 0; + return; } static inline int acpi_mps_check(void) -- cgit v1.2.3 From cfe79c00a2f4f687eed8b7534d1d3d3d35540c29 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 6 Jan 2010 17:23:23 +0000 Subject: NOMMU: Avoiding duplicate icache flushes of shared maps When working with FDPIC, there are many shared mappings of read-only code regions between applications (the C library, applet packages like busybox, etc.), but the current do_mmap_pgoff() function will issue an icache flush whenever a VMA is added to an MM instead of only doing it when the map is initially created. The flush can instead be done when a region is first mmapped PROT_EXEC. Note that we may not rely on the first mapping of a region being executable - it's possible for it to be PROT_READ only, so we have to remember whether we've flushed the region or not, and then flush the entire region when a bit of it is made executable. However, this also affects the brk area. That will no longer be executable. We can mprotect() it to PROT_EXEC on MPU-mode kernels, but for NOMMU mode kernels, when it increases the brk allocation, making sys_brk() flush the extra from the icache should suffice. The brk area probably isn't used by NOMMU programs since the brk area can only use up the leavings from the stack allocation, where the stack allocation is larger than requested. Signed-off-by: David Howells Signed-off-by: Mike Frysinger Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 84a524afb3dc..84d020bed083 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -123,6 +123,8 @@ struct vm_region { struct file *vm_file; /* the backing file or NULL */ atomic_t vm_usage; /* region usage count */ + bool vm_icache_flushed : 1; /* true if the icache has been flushed for + * this region */ }; /* -- cgit v1.2.3 From 65324144b50bc7022cc9b6ca8f4a536a957019e3 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 5 Jan 2010 05:50:47 +0000 Subject: net: RFC3069, private VLAN proxy arp support This is to be used together with switch technologies, like RFC3069, that where the individual ports are not allowed to communicate with each other, but they are allowed to talk to the upstream router. As described in RFC 3069, it is possible to allow these hosts to communicate through the upstream router by proxy_arp'ing. This patch basically allow proxy arp replies back to the same interface (from which the ARP request/solicitation was received). Tunable per device via proc "proxy_arp_pvlan": /proc/sys/net/ipv4/conf/*/proxy_arp_pvlan This switch technology is known by different vendor names: - In RFC 3069 it is called VLAN Aggregation. - Cisco and Allied Telesyn call it Private VLAN. - Hewlett-Packard call it Source-Port filtering or port-isolation. - Ericsson call it MAC-Forced Forwarding (RFC Draft). Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 1 + include/linux/sysctl.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 699e85c01a4d..9a8c57467d3d 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -88,6 +88,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_LOG_MARTIANS(in_dev) IN_DEV_ORCONF((in_dev), LOG_MARTIANS) #define IN_DEV_PROXY_ARP(in_dev) IN_DEV_ORCONF((in_dev), PROXY_ARP) +#define IN_DEV_PROXY_ARP_PVLAN(in_dev) IN_DEV_CONF_GET(in_dev, PROXY_ARP_PVLAN) #define IN_DEV_SHARED_MEDIA(in_dev) IN_DEV_ORCONF((in_dev), SHARED_MEDIA) #define IN_DEV_TX_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), SEND_REDIRECTS) #define IN_DEV_SEC_REDIRECTS(in_dev) IN_DEV_ORCONF((in_dev), \ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 877ba039e6a4..24ff7e3a0d59 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -482,6 +482,7 @@ enum NET_IPV4_CONF_ARP_ACCEPT=21, NET_IPV4_CONF_ARP_NOTIFY=22, NET_IPV4_CONF_ACCEPT_LOCAL=23, + NET_IPV4_CONF_PROXY_ARP_PVLAN=24, __NET_IPV4_CONF_MAX }; -- cgit v1.2.3 From 6144a85a0e018c19bc4b24f7eb6c1f3f7431813d Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 7 Jan 2010 11:58:36 -0600 Subject: maccess,probe_kernel: Allow arch specific override probe_kernel_(read|write) Some archs such as blackfin, would like to have an arch specific probe_kernel_read() and probe_kernel_write() implementation which can fall back to the generic implementation if no special operations are needed. CC: Thomas Gleixner CC: Ingo Molnar Signed-off-by: Jason Wessel Signed-off-by: Mike Frysinger --- include/linux/uaccess.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 6b58367d145e..d512d98dfb7d 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -94,6 +94,7 @@ static inline unsigned long __copy_from_user_nocache(void *to, * happens, handle that and return -EFAULT. */ extern long probe_kernel_read(void *dst, void *src, size_t size); +extern long __probe_kernel_read(void *dst, void *src, size_t size); /* * probe_kernel_write(): safely attempt to write to a location @@ -104,6 +105,7 @@ extern long probe_kernel_read(void *dst, void *src, size_t size); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -extern long probe_kernel_write(void *dst, void *src, size_t size); +extern long notrace probe_kernel_write(void *dst, void *src, size_t size); +extern long notrace __probe_kernel_write(void *dst, void *src, size_t size); #endif /* __LINUX_UACCESS_H__ */ -- cgit v1.2.3 From b11e1eca7ed9c0b5dab21a62c11acc711d9bdda0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 7 Jan 2010 11:58:37 -0600 Subject: kgdb: Fix kernel-doc format error in kgdb.h linux-next-20081022//include/linux/kgdb.h:308): duplicate section name 'Description' and fix typos in that file's kernel-doc comments. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Jason Wessel --- include/linux/kgdb.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 6adcc297e354..19ec41a183f5 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -29,8 +29,7 @@ struct pt_regs; * * On some architectures it is required to skip a breakpoint * exception when it occurs after a breakpoint has been removed. - * This can be implemented in the architecture specific portion of - * for kgdb. + * This can be implemented in the architecture specific portion of kgdb. */ extern int kgdb_skipexception(int exception, struct pt_regs *regs); @@ -65,7 +64,7 @@ struct uart_port; /** * kgdb_breakpoint - compiled in breakpoint * - * This will be impelmented a static inline per architecture. This + * This will be implemented as a static inline per architecture. This * function is called by the kgdb core to execute an architecture * specific trap to cause kgdb to enter the exception processing. * @@ -190,7 +189,7 @@ kgdb_arch_handle_exception(int vector, int signo, int err_code, * @flags: Current IRQ state * * On SMP systems, we need to get the attention of the other CPUs - * and get them be in a known state. This should do what is needed + * and get them into a known state. This should do what is needed * to get the other CPUs to call kgdb_wait(). Note that on some arches, * the NMI approach is not used for rounding up all the CPUs. For example, * in case of MIPS, smp_call_function() is used to roundup CPUs. In -- cgit v1.2.3 From 3c9732c06879d85f2fdf7ec69198c1d78da42a98 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Wed, 6 Jan 2010 23:07:13 +0000 Subject: stmmac: add the new Header file for stmmac platform data Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- include/linux/stmmac.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 include/linux/stmmac.h (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h new file mode 100644 index 000000000000..32bfd1a8a48d --- /dev/null +++ b/include/linux/stmmac.h @@ -0,0 +1,53 @@ +/******************************************************************************* + + Header file for stmmac platform data + + Copyright (C) 2009 STMicroelectronics Ltd + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Giuseppe Cavallaro +*******************************************************************************/ + +#ifndef __STMMAC_PLATFORM_DATA +#define __STMMAC_PLATFORM_DATA + +/* platfrom data for platfrom device structure's platfrom_data field */ + +/* Private data for the STM on-board ethernet driver */ +struct plat_stmmacenet_data { + int bus_id; + int pbl; + int has_gmac; + void (*fix_mac_speed)(void *priv, unsigned int speed); + void (*bus_setup)(unsigned long ioaddr); +#ifdef CONFIG_STM_DRIVERS + struct stm_pad_config *pad_config; +#endif + void *bsp_priv; +}; + +struct plat_stmmacphy_data { + int bus_id; + int phy_addr; + unsigned int phy_mask; + int interface; + int (*phy_reset)(void *priv); + void *priv; +}; +#endif + -- cgit v1.2.3 From dd3d145d49c5816b79acc6761ebbd842bc50b0ee Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 11 Jan 2010 03:21:48 -0500 Subject: block: Fix discard alignment calculation and printing Discard alignment reporting for partitions was incorrect. Update to match the algorithm used elsewhere. The alignment can be negative (misaligned). Fix format string accordingly. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9b98173a8184..a41bcc8e140f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1148,8 +1148,11 @@ static inline int queue_discard_alignment(struct request_queue *q) static inline int queue_sector_discard_alignment(struct request_queue *q, sector_t sector) { - return ((sector << 9) - q->limits.discard_alignment) - & (q->limits.discard_granularity - 1); + struct queue_limits *lim = &q->limits; + unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1); + + return (lim->discard_granularity + lim->discard_alignment - alignment) + & (lim->discard_granularity - 1); } static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) -- cgit v1.2.3 From 17be8c245054b9c7786545af3ba3ca4e54cd4ad9 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 11 Jan 2010 03:21:49 -0500 Subject: block: bdev_stack_limits wrapper DM does not want to know about partition offsets. Add a partition-aware wrapper that DM can use when stacking block devices. Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Reviewed-by: Alasdair G Kergon Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a41bcc8e140f..5c8018977efa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -938,6 +938,8 @@ extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); extern void blk_set_default_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); +extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, + sector_t offset); extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); -- cgit v1.2.3 From ce289321b7dc1eb108e3df0dec872b7429ef49f7 Mon Sep 17 00:00:00 2001 From: Kirill Afonshin Date: Fri, 8 Jan 2010 22:09:59 +0300 Subject: block: removed unused as_io_context It isn't used anymore, since AS was deleted. Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index a63235996309..78ef023227d4 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -4,32 +4,6 @@ #include #include -/* - * This is the per-process anticipatory I/O scheduler state. - */ -struct as_io_context { - spinlock_t lock; - - void (*dtor)(struct as_io_context *aic); /* destructor */ - void (*exit)(struct as_io_context *aic); /* called on task exit */ - - unsigned long state; - atomic_t nr_queued; /* queued reads & sync writes */ - atomic_t nr_dispatched; /* number of requests gone to the drivers */ - - /* IO History tracking */ - /* Thinktime */ - unsigned long last_end_request; - unsigned long ttime_total; - unsigned long ttime_samples; - unsigned long ttime_mean; - /* Layout pattern */ - unsigned int seek_samples; - sector_t last_request_pos; - u64 seek_total; - sector_t seek_mean; -}; - struct cfq_queue; struct cfq_io_context { void *key; @@ -78,7 +52,6 @@ struct io_context { unsigned long last_waited; /* Time last woken after wait for request */ int nr_batch_requests; /* Number of requests left in the batch */ - struct as_io_context *aic; struct radix_tree_root radix_root; struct hlist_head cic_list; void *ioc_data; -- cgit v1.2.3 From 7af92f8754b87bc78cbfd447d5f4096b25c46682 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 6 Jan 2010 15:45:55 -0800 Subject: genhd: overlapping variable definition This fixes the sparse warning: fs/ext4/super.c:2390:40: warning: symbol 'i' shadows an earlier one fs/ext4/super.c:2368:22: originally declared here Using 'i' in a macro is dubious practice. Signed-off-by: Stephen Hemminger Signed-off-by: Jens Axboe --- include/linux/genhd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c6c0c41af35f..9717081c75ad 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -256,9 +256,9 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, #define part_stat_read(part, field) \ ({ \ typeof((part)->dkstats->field) res = 0; \ - int i; \ - for_each_possible_cpu(i) \ - res += per_cpu_ptr((part)->dkstats, i)->field; \ + unsigned int _cpu; \ + for_each_possible_cpu(_cpu) \ + res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ res; \ }) -- cgit v1.2.3 From 4b529401c5089cf33f7165607cbc2fde43357bfb Mon Sep 17 00:00:00 2001 From: Andreas Fenkart Date: Fri, 8 Jan 2010 14:42:31 -0800 Subject: mm: make totalhigh_pages unsigned long Makes it consistent with the extern declaration, used when CONFIG_HIGHMEM is set Removes redundant casts in printout messages Signed-off-by: Andreas Fenkart Acked-by: Russell King Cc: Ralf Baechle Cc: David Howells Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Chen Liqin Cc: Lennox Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 211ff4497269..ab2cc20e21a5 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -46,7 +46,7 @@ void kmap_flush_unused(void); static inline unsigned int nr_free_highpages(void) { return 0; } -#define totalhigh_pages 0 +#define totalhigh_pages 0UL #ifndef ARCH_HAS_KMAP static inline void *kmap(struct page *page) -- cgit v1.2.3 From e992cd9b72a18122bd5c958715623057f110793f Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 8 Jan 2010 14:42:35 -0800 Subject: kmemcheck: make bitfield annotations truly no-ops when disabled It turns out that even zero-sized struct members (int foo[0];) will affect the struct layout, causing us in particular to lose 4 bytes in struct sock. This patch fixes the regression in CONFIG_KMEMCHECK=n case. Reported-by: Eric Dumazet Signed-off-by: Vegard Nossum Acked-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmemcheck.h | 110 ++++++++++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h index e880d4cf9e22..08d7dc4ddf40 100644 --- a/include/linux/kmemcheck.h +++ b/include/linux/kmemcheck.h @@ -36,6 +36,56 @@ int kmemcheck_hide_addr(unsigned long address); bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size); +/* + * Bitfield annotations + * + * How to use: If you have a struct using bitfields, for example + * + * struct a { + * int x:8, y:8; + * }; + * + * then this should be rewritten as + * + * struct a { + * kmemcheck_bitfield_begin(flags); + * int x:8, y:8; + * kmemcheck_bitfield_end(flags); + * }; + * + * Now the "flags_begin" and "flags_end" members may be used to refer to the + * beginning and end, respectively, of the bitfield (and things like + * &x.flags_begin is allowed). As soon as the struct is allocated, the bit- + * fields should be annotated: + * + * struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL); + * kmemcheck_annotate_bitfield(a, flags); + */ +#define kmemcheck_bitfield_begin(name) \ + int name##_begin[0]; + +#define kmemcheck_bitfield_end(name) \ + int name##_end[0]; + +#define kmemcheck_annotate_bitfield(ptr, name) \ + do { \ + int _n; \ + \ + if (!ptr) \ + break; \ + \ + _n = (long) &((ptr)->name##_end) \ + - (long) &((ptr)->name##_begin); \ + MAYBE_BUILD_BUG_ON(_n < 0); \ + \ + kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \ + } while (0) + +#define kmemcheck_annotate_variable(var) \ + do { \ + kmemcheck_mark_initialized(&(var), sizeof(var)); \ + } while (0) \ + #else #define kmemcheck_enabled 0 @@ -106,60 +156,16 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) return true; } -#endif /* CONFIG_KMEMCHECK */ - -/* - * Bitfield annotations - * - * How to use: If you have a struct using bitfields, for example - * - * struct a { - * int x:8, y:8; - * }; - * - * then this should be rewritten as - * - * struct a { - * kmemcheck_bitfield_begin(flags); - * int x:8, y:8; - * kmemcheck_bitfield_end(flags); - * }; - * - * Now the "flags_begin" and "flags_end" members may be used to refer to the - * beginning and end, respectively, of the bitfield (and things like - * &x.flags_begin is allowed). As soon as the struct is allocated, the bit- - * fields should be annotated: - * - * struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL); - * kmemcheck_annotate_bitfield(a, flags); - * - * Note: We provide the same definitions for both kmemcheck and non- - * kmemcheck kernels. This makes it harder to introduce accidental errors. It - * is also allowed to pass NULL pointers to kmemcheck_annotate_bitfield(). - */ -#define kmemcheck_bitfield_begin(name) \ - int name##_begin[0]; - -#define kmemcheck_bitfield_end(name) \ - int name##_end[0]; +#define kmemcheck_bitfield_begin(name) +#define kmemcheck_bitfield_end(name) +#define kmemcheck_annotate_bitfield(ptr, name) \ + do { \ + } while (0) -#define kmemcheck_annotate_bitfield(ptr, name) \ - do { \ - int _n; \ - \ - if (!ptr) \ - break; \ - \ - _n = (long) &((ptr)->name##_end) \ - - (long) &((ptr)->name##_begin); \ - MAYBE_BUILD_BUG_ON(_n < 0); \ - \ - kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \ +#define kmemcheck_annotate_variable(var) \ + do { \ } while (0) -#define kmemcheck_annotate_variable(var) \ - do { \ - kmemcheck_mark_initialized(&(var), sizeof(var)); \ - } while (0) \ +#endif /* CONFIG_KMEMCHECK */ #endif /* LINUX_KMEMCHECK_H */ -- cgit v1.2.3 From 7dd65feb6c603e13eba501c34c662259ab38e70e Mon Sep 17 00:00:00 2001 From: Albin Tonnerre Date: Fri, 8 Jan 2010 14:42:42 -0800 Subject: lib: add support for LZO-compressed kernels This patch series adds generic support for creating and extracting LZO-compressed kernel images, as well as support for using such images on the x86 and ARM architectures, and support for creating and using LZO-compressed initrd and initramfs images. Russell King said: : Testing on a Cortex A9 model: : - lzo decompressor is 65% of the time gzip takes to decompress a kernel : - lzo kernel is 9% larger than a gzip kernel : : which I'm happy to say confirms your figures when comparing the two. : : However, when comparing your new gzip code to the old gzip code: : - new is 99% of the size of the old code : - new takes 42% of the time to decompress than the old code : : What this means is that for a proper comparison, the results get even better: : - lzo is 7.5% larger than the old gzip'd kernel image : - lzo takes 28% of the time that the old gzip code took : : So the expense seems definitely worth the effort. The only reason I : can think of ever using gzip would be if you needed the additional : compression (eg, because you have limited flash to store the image.) : : I would argue that the default for ARM should therefore be LZO. This patch: The lzo compressor is worse than gzip at compression, but faster at extraction. Here are some figures for an ARM board I'm working on: Uncompressed size: 3.24Mo gzip 1.61Mo 0.72s lzo 1.75Mo 0.48s So for a compression ratio that is still relatively close to gzip, it's much faster to extract, at least in that case. This part contains: - Makefile routine to support lzo compression - Fixes to the existing lzo compressor so that it can be used in compressed kernels - wrapper around the existing lzo1x_decompress, as it only extracts one block at a time, while we need to extract a whole file here - config dialog for kernel compression [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: cleanup] Signed-off-by: Albin Tonnerre Tested-by: Wu Zhangjin Acked-by: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Tested-by: Russell King Acked-by: Russell King Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/decompress/unlzo.h | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 include/linux/decompress/unlzo.h (limited to 'include/linux') diff --git a/include/linux/decompress/unlzo.h b/include/linux/decompress/unlzo.h new file mode 100644 index 000000000000..987229752519 --- /dev/null +++ b/include/linux/decompress/unlzo.h @@ -0,0 +1,10 @@ +#ifndef DECOMPRESS_UNLZO_H +#define DECOMPRESS_UNLZO_H + +int unlzo(unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *pos, + void(*error)(char *x)); +#endif -- cgit v1.2.3 From 80884094e34456887ecdbd107d40e72c4a40f9c9 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Fri, 8 Jan 2010 14:43:08 -0800 Subject: gpio: adp5588-gpio: new driver for ADP5588 GPIO expanders Signed-off-by: Michael Hennerich Signed-off-by: Mike Frysinger Cc: Jean Delvare Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2c/adp5588.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h index fc5db826b48e..02c9af374741 100644 --- a/include/linux/i2c/adp5588.h +++ b/include/linux/i2c/adp5588.h @@ -89,4 +89,16 @@ struct adp5588_kpad_platform_data { unsigned short unlock_key2; /* Unlock Key 2 */ }; +struct adp5588_gpio_platform_data { + unsigned gpio_start; /* GPIO Chip base # */ + unsigned pullup_dis_mask; /* Pull-Up Disable Mask */ + int (*setup)(struct i2c_client *client, + int gpio, unsigned ngpio, + void *context); + int (*teardown)(struct i2c_client *client, + int gpio, unsigned ngpio, + void *context); + void *context; +}; + #endif -- cgit v1.2.3 From a29815a333c6c6e677294bbe5958e771d0aad3fd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 10 Jan 2010 16:28:09 +0200 Subject: core, x86: make LIST_POISON less deadly The list macros use LIST_POISON1 and LIST_POISON2 as undereferencable pointers in order to trap erronous use of freed list_heads. Unfortunately userspace can arrange for those pointers to actually be dereferencable, potentially turning an oops to an expolit. To avoid this allow architectures (currently x86_64 only) to override the default values for these pointers with truly-undereferencable values. This is easy on x86_64 as the virtual address space is large and contains areas that cannot be mapped. Other 64-bit architectures will likely find similar unmapped ranges. [ingo: switch to 0xdead000000000000 as the unmapped area] [ingo: add comments, cleanup] [jaswinder: eliminate sparse warnings] Acked-by: Linus Torvalds Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar Signed-off-by: Avi Kivity Signed-off-by: Linus Torvalds --- include/linux/poison.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/poison.h b/include/linux/poison.h index 7fc194aef8c2..2110a81c5e2a 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -2,13 +2,25 @@ #define _LINUX_POISON_H /********** include/linux/list.h **********/ + +/* + * Architectures might want to move the poison pointer offset + * into some well-recognized area such as 0xdead000000000000, + * that is also not mappable by user-space exploits: + */ +#ifdef CONFIG_ILLEGAL_POINTER_VALUE +# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL) +#else +# define POISON_POINTER_DELTA 0 +#endif + /* * These are non-NULL pointers that will result in page faults * under normal circumstances, used to verify that nobody uses * non-initialized list entries. */ -#define LIST_POISON1 ((void *) 0x00100100) -#define LIST_POISON2 ((void *) 0x00200200) +#define LIST_POISON1 ((void *) 0x00100100 + POISON_POINTER_DELTA) +#define LIST_POISON2 ((void *) 0x00200200 + POISON_POINTER_DELTA) /********** include/linux/timer.h **********/ /* -- cgit v1.2.3 From d218d11133d888f9745802146a50255a4781d37a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 11 Jan 2010 16:28:01 -0800 Subject: tcp: Generalized TTL Security Mechanism This patch adds the kernel portions needed to implement RFC 5082 Generalized TTL Security Mechanism (GTSM). It is a lightweight security measure against forged packets causing DoS attacks (for BGP). This is already implemented the same way in BSD kernels. For the necessary Quagga patch http://www.gossamer-threads.com/lists/quagga/dev/17389 Description from Cisco http://www.cisco.com/en/US/docs/ios/12_3t/12_3t7/feature/guide/gt_btsh.html It does add one byte to each socket structure, but I did a little rearrangement to reuse a hole (on 64 bit), but it does grow the structure on 32 bit This should be documented on ip(4) man page and the Glibc in.h file also needs update. IPV6_MINHOPLIMIT should also be added (although BSD doesn't support that). Only TCP is supported, but could also be added to UDP, DCCP, SCTP if desired. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/in.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/in.h b/include/linux/in.h index b615649db129..583c76f9c30f 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -84,6 +84,8 @@ struct in_addr { #define IP_ORIGDSTADDR 20 #define IP_RECVORIGDSTADDR IP_ORIGDSTADDR +#define IP_MINTTL 21 + /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ #define IP_PMTUDISC_WANT 1 /* Use per route hints */ -- cgit v1.2.3 From a393db6f10ef2d4f28257234cfc730e744dfb6a4 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 22 Dec 2009 13:35:52 +0100 Subject: drbd: Allow online resizing of DRBD devices while peer not reachable (needs to be explicitly forced) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_nl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index db5721ad50d1..a4d82f895994 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -69,6 +69,7 @@ NL_PACKET(disconnect, 6, ) NL_PACKET(resize, 7, NL_INT64( 29, T_MAY_IGNORE, resize_size) + NL_BIT( 68, T_MAY_IGNORE, resize_force) ) NL_PACKET(syncer_conf, 8, -- cgit v1.2.3 From 2ebccd71a71e6078920bc65b40f120e72b71c2b6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 12 Jan 2010 10:09:07 +0100 Subject: drbd: The kernel code is now equivalent to out of tree release 8.3.7 Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index e84f4733cb55..78962272338a 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.6" +#define REL_VERSION "8.3.7" #define API_VERSION 88 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 91 -- cgit v1.2.3 From 3ccd4c6167d3b39d52631767ebbf8b5677c5855d Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 12 Jan 2010 02:00:46 -0800 Subject: can: Unify droping of invalid tx skbs and netdev stats To prevent the CAN drivers to operate on invalid socketbuffers the skbs are now checked and silently dropped at the xmit-function consistently. Also the netdev stats are consistently using the CAN data length code (dlc) for [rx|tx]_bytes now. Signed-off-by: Oliver Hartkopp Acked-by: Wolfgang Grandegger Signed-off-by: David S. Miller --- include/linux/can/dev.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 3db7767d2a17..7e7c98a3e908 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -60,6 +60,21 @@ struct can_priv { */ #define get_can_dlc(i) (min_t(__u8, (i), 8)) +/* Drop a given socketbuffer if it does not contain a valid CAN frame. */ +static inline int can_dropped_invalid_skb(struct net_device *dev, + struct sk_buff *skb) +{ + const struct can_frame *cf = (struct can_frame *)skb->data; + + if (unlikely(skb->len != sizeof(*cf) || cf->can_dlc > 8)) { + kfree_skb(skb); + dev->stats.tx_dropped++; + return 1; + } + + return 0; +} + struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max); void free_candev(struct net_device *dev); -- cgit v1.2.3 From 81077e82c3f591578625805dd6464a27a9ff56ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Turek?= <8an@praha12.net> Date: Mon, 21 Dec 2009 22:50:47 +0100 Subject: nl80211: Add new WIPHY attribute COVERAGE_CLASS The new attribute NL80211_ATTR_WIPHY_COVERAGE_CLASS sets IEEE 802.11 Coverage Class, which depends on maximum distance of nodes in a wireless network. It's required for long distance links (more than a few hundred meters). The attribute is now ignored by two non-mac80211 drivers, rndis and iwmc3200wifi, together with WIPHY_PARAM_RETRY_SHORT and WIPHY_PARAM_RETRY_LONG. If it turns out to be a problem, we could split set_wiphy_params callback or add new capability bits. Signed-off-by: Lukas Turek <8an@praha12.net> Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 2bfbe88837ef..d4c556de7170 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -430,6 +430,8 @@ enum nl80211_commands { * @NL80211_ATTR_WIPHY_RTS_THRESHOLD: RTS threshold (TX frames with length * larger than or equal to this use RTS/CTS handshake); allowed range: * 0..65536, disable with (u32)-1; dot11RTSThreshold; u32 + * @NL80211_ATTR_WIPHY_COVERAGE_CLASS: Coverage Class as defined by IEEE 802.11 + * section 7.3.2.9; dot11CoverageClass; u8 * * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on * @NL80211_ATTR_IFNAME: network interface name @@ -779,6 +781,8 @@ enum nl80211_attrs { NL80211_ATTR_COOKIE, + NL80211_ATTR_WIPHY_COVERAGE_CLASS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.3 From 13ae75b103e07304a34ab40c9136e9f53e06475c Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Tue, 29 Dec 2009 12:59:45 +0200 Subject: nl80211: New command for setting TX rate mask for rate control Add a new NL80211_CMD_SET_TX_BITRATE_MASK command and related attributes to provide support for setting TX rate mask for rate control. This uses the existing cfg80211 set_bitrate_mask operation that was previously used only with WEXT compat code (SIOCSIWRATE). The nl80211 command allows more generic configuration of allowed rates as a mask instead of fixed/max rate. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index d4c556de7170..7a1c8c145b22 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -295,6 +295,10 @@ * This command is also used as an event to notify when a requested * remain-on-channel duration has expired. * + * @NL80211_CMD_SET_TX_BITRATE_MASK: Set the mask of rates to be used in TX + * rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface + * and @NL80211_ATTR_TX_RATES the set of allowed rates. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -381,6 +385,8 @@ enum nl80211_commands { NL80211_CMD_REMAIN_ON_CHANNEL, NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, + NL80211_CMD_SET_TX_BITRATE_MASK, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -640,6 +646,13 @@ enum nl80211_commands { * * @NL80211_ATTR_COOKIE: Generic 64-bit cookie to identify objects. * + * @NL80211_ATTR_TX_RATES: Nested set of attributes + * (enum nl80211_tx_rate_attributes) describing TX rates per band. The + * enum nl80211_band value is used as the index (nla_type() of the nested + * data. If a band is not included, it will be configured to allow all + * rates based on negotiated supported rates information. This attribute + * is used with %NL80211_CMD_SET_TX_BITRATE_MASK. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -783,6 +796,8 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_COVERAGE_CLASS, + NL80211_ATTR_TX_RATES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1482,4 +1497,33 @@ enum nl80211_key_attributes { NL80211_KEY_MAX = __NL80211_KEY_AFTER_LAST - 1 }; +/** + * enum nl80211_tx_rate_attributes - TX rate set attributes + * @__NL80211_TXRATE_INVALID: invalid + * @NL80211_TXRATE_LEGACY: Legacy (non-MCS) rates allowed for TX rate selection + * in an array of rates as defined in IEEE 802.11 7.3.2.2 (u8 values with + * 1 = 500 kbps) but without the IE length restriction (at most + * %NL80211_MAX_SUPP_RATES in a single array). + * @__NL80211_TXRATE_AFTER_LAST: internal + * @NL80211_TXRATE_MAX: highest TX rate attribute + */ +enum nl80211_tx_rate_attributes { + __NL80211_TXRATE_INVALID, + NL80211_TXRATE_LEGACY, + + /* keep last */ + __NL80211_TXRATE_AFTER_LAST, + NL80211_TXRATE_MAX = __NL80211_TXRATE_AFTER_LAST - 1 +}; + +/** + * enum nl80211_band - Frequency band + * @NL80211_BAND_2GHZ - 2.4 GHz ISM band + * @NL80211_BAND_5GHZ - around 5 GHz band (4.9 - 5.7 GHz) + */ +enum nl80211_band { + NL80211_BAND_2GHZ, + NL80211_BAND_5GHZ, +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From 7044cc565b45a898c140fb185174a66f2d68a163 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 5 Jan 2010 20:16:19 +0200 Subject: mac80211: add functions to create PS Poll and Nullfunc templates Some hardware, for example wl1251 and wl1271, handle the transmission of power save related frames in hardware, but the driver is responsible for creating the templates. It's better to create the templates in mac80211, that way all drivers can benefit from this. Add two new functions, ieee80211_pspoll_get() and ieee80211_nullfunc_get() which drivers need to call to get the frame. Drivers are also responsible for updating the templates after each association. Also new struct ieee80211_hdr_3addr is added to ieee80211.h to make it easy to calculate length of the Nullfunc frame. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index aeea282bd2fe..602c0692c3fc 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -130,6 +130,15 @@ struct ieee80211_hdr { u8 addr4[6]; } __attribute__ ((packed)); +struct ieee80211_hdr_3addr { + __le16 frame_control; + __le16 duration_id; + u8 addr1[6]; + u8 addr2[6]; + u8 addr3[6]; + __le16 seq_ctrl; +} __attribute__ ((packed)); + /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set * @fc: frame control bytes in little-endian byteorder -- cgit v1.2.3 From 34a6eddbabd704b3c7dae9362234552267573be2 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 6 Jan 2010 16:19:24 +0200 Subject: cfg80211: Store IEs from both Beacon and Probe Response frames Store information elements from Beacon and Probe Response frames in separate buffers to allow both sets to be made available through nl80211. This allows user space applications to get access to IEs from Beacon frames even if we have received Probe Response frames from the BSS. Previously, the IEs from Probe Response frames would have overridden the IEs from Beacon frames. This feature is of somewhat limited use since most protocols include the same (or extended) information in Probe Response frames. However, there are couple of exceptions where the IEs from Beacon frames could be of some use: TIM IE is only included in Beacon frames (and it would be needed to figure out the DTIM period used in the BSS) and at least some implementations of Wireless Provisioning Services seem to include the full IE only in Beacon frames). The new BSS attribute for scan results is added to allow both the IE sets to be delivered. This is done in a way that maintains the previously used behavior for applications that are not aware of the new NL80211_BSS_BEACON_IES attribute. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 7a1c8c145b22..127a73015760 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1378,13 +1378,20 @@ enum nl80211_channel_type { * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16) * @NL80211_BSS_CAPABILITY: capability field (CPU order, u16) * @NL80211_BSS_INFORMATION_ELEMENTS: binary attribute containing the - * raw information elements from the probe response/beacon (bin) + * raw information elements from the probe response/beacon (bin); + * if the %NL80211_BSS_BEACON_IES attribute is present, the IEs here are + * from a Probe Response frame; otherwise they are from a Beacon frame. + * However, if the driver does not indicate the source of the IEs, these + * IEs may be from either frame subtype. * @NL80211_BSS_SIGNAL_MBM: signal strength of probe response/beacon * in mBm (100 * dBm) (s32) * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon * in unspecified units, scaled to 0..100 (u8) * @NL80211_BSS_STATUS: status, if this BSS is "used" * @NL80211_BSS_SEEN_MS_AGO: age of this BSS entry in ms + * @NL80211_BSS_BEACON_IES: binary attribute containing the raw information + * elements from a Beacon frame (bin); not present if no Beacon frame has + * yet been received * @__NL80211_BSS_AFTER_LAST: internal * @NL80211_BSS_MAX: highest BSS attribute */ @@ -1400,6 +1407,7 @@ enum nl80211_bss { NL80211_BSS_SIGNAL_UNSPEC, NL80211_BSS_STATUS, NL80211_BSS_SEEN_MS_AGO, + NL80211_BSS_BEACON_IES, /* keep last */ __NL80211_BSS_AFTER_LAST, -- cgit v1.2.3 From ab13315af97919fae0e014748105fdc2e30afb2d Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 12 Jan 2010 10:42:31 +0200 Subject: mac80211: add U-APSD client support Add Unscheduled Automatic Power-Save Delivery (U-APSD) client support. The idea is that the data frames from the client trigger AP to send the buffered frames with ACs which have U-APSD enabled. This decreases latency and makes it possible to save even more power. Driver needs to use IEEE80211_HW_UAPSD to enable the feature. The current implementation assumes that firmware takes care of the wakeup and hardware needing IEEE80211_HW_PS_NULLFUNC_STACK is not yet supported. Tested with wl1251 on a Nokia N900 and Cisco Aironet 1231G AP and running various test traffic with ping. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 602c0692c3fc..a8c6069a0d9f 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -120,6 +120,24 @@ #define IEEE80211_QOS_CTL_TID_MASK 0x000F #define IEEE80211_QOS_CTL_TAG1D_MASK 0x0007 +/* U-APSD queue for WMM IEs sent by AP */ +#define IEEE80211_WMM_IE_AP_QOSINFO_UAPSD (1<<7) + +/* U-APSD queues for WMM IEs sent by STA */ +#define IEEE80211_WMM_IE_STA_QOSINFO_AC_VO (1<<0) +#define IEEE80211_WMM_IE_STA_QOSINFO_AC_VI (1<<1) +#define IEEE80211_WMM_IE_STA_QOSINFO_AC_BK (1<<2) +#define IEEE80211_WMM_IE_STA_QOSINFO_AC_BE (1<<3) +#define IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK 0x0f + +/* U-APSD max SP length for WMM IEs sent by STA */ +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL 0x00 +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_2 0x01 +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_4 0x02 +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_6 0x03 +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK 0x03 +#define IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT 5 + struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; -- cgit v1.2.3 From 558a6669d7cb407fbb0b5aec184b5c3b9a893d30 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 12 Jan 2010 10:43:00 +0200 Subject: ieee80211: add struct ieee80211_hdr_qos The header can be used to create qos nullfunc frames, for example. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a8c6069a0d9f..842701906ae9 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -157,6 +157,16 @@ struct ieee80211_hdr_3addr { __le16 seq_ctrl; } __attribute__ ((packed)); +struct ieee80211_qos_hdr { + __le16 frame_control; + __le16 duration_id; + u8 addr1[6]; + u8 addr2[6]; + u8 addr3[6]; + __le16 seq_ctrl; + __le16 qos_ctrl; +} __attribute__ ((packed)); + /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set * @fc: frame control bytes in little-endian byteorder -- cgit v1.2.3 From 5040ab67a2c6d5710ba497dc52a8f7035729d7b0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 11 Jan 2010 11:14:44 +0900 Subject: libata: retry link resume if necessary Interestingly, when SIDPR is used in ata_piix, writes to DET in SControl sometimes get ignored leading to detection failure. Update sata_link_resume() such that it reads back SControl after clearing DET and retry if it's not clear. Signed-off-by: Tejun Heo Reported-by: fengxiangjun Reported-by: Jim Faulkner Cc: stable@kernel.org Signed-off-by: Jeff Garzik --- include/linux/libata.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 6a9c4ddd3d95..73112250862c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -354,6 +354,9 @@ enum { /* max tries if error condition is still set after ->error_handler */ ATA_EH_MAX_TRIES = 5, + /* sometimes resuming a link requires several retries */ + ATA_LINK_RESUME_TRIES = 5, + /* how hard are we gonna try to probe/recover devices */ ATA_PROBE_MAX_TRIES = 3, ATA_EH_DEV_TRIES = 3, -- cgit v1.2.3 From 2c761270d5520dd84ab0b4e47c24d99ff8503c38 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 12 Jan 2010 17:39:16 +1100 Subject: lib: Introduce generic list_sort function There are two copies of list_sort() in the tree already, one in the DRM code, another in ubifs. Now XFS needs this as well. Create a generic list_sort() function from the ubifs version and convert existing users to it so we don't end up with yet another copy in the tree. Signed-off-by: Dave Chinner Acked-by: Dave Airlie Acked-by: Artem Bityutskiy Signed-off-by: Linus Torvalds --- include/linux/list_sort.h | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 include/linux/list_sort.h (limited to 'include/linux') diff --git a/include/linux/list_sort.h b/include/linux/list_sort.h new file mode 100644 index 000000000000..1a2df2efb771 --- /dev/null +++ b/include/linux/list_sort.h @@ -0,0 +1,11 @@ +#ifndef _LINUX_LIST_SORT_H +#define _LINUX_LIST_SORT_H + +#include + +struct list_head; + +void list_sort(void *priv, struct list_head *head, + int (*cmp)(void *priv, struct list_head *a, + struct list_head *b)); +#endif -- cgit v1.2.3 From 508e14b4a4fb1a824a14f2c5b8d7df67b313f8e4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 12 Jan 2010 14:27:30 +0000 Subject: netpoll: allow execution of multiple rx_hooks per interface Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netpoll.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 2524267210d3..a765ea898549 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -21,15 +21,20 @@ struct netpoll { __be32 local_ip, remote_ip; u16 local_port, remote_port; u8 remote_mac[ETH_ALEN]; + + struct list_head rx; /* rx_np list element */ }; struct netpoll_info { atomic_t refcnt; + int rx_flags; spinlock_t rx_lock; - struct netpoll *rx_np; /* netpoll that registered an rx_hook */ + struct list_head rx_np; /* netpolls that registered an rx_hook */ + struct sk_buff_head arp_tx; /* list of arp requests to reply to */ struct sk_buff_head txq; + struct delayed_work tx_work; }; @@ -51,7 +56,7 @@ static inline int netpoll_rx(struct sk_buff *skb) unsigned long flags; int ret = 0; - if (!npinfo || (!npinfo->rx_np && !npinfo->rx_flags)) + if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags)) return 0; spin_lock_irqsave(&npinfo->rx_lock, flags); @@ -67,7 +72,7 @@ static inline int netpoll_rx_on(struct sk_buff *skb) { struct netpoll_info *npinfo = skb->dev->npinfo; - return npinfo && (npinfo->rx_np || npinfo->rx_flags); + return npinfo && (!list_empty(&npinfo->rx_np) || npinfo->rx_flags); } static inline int netpoll_receive_skb(struct sk_buff *skb) -- cgit v1.2.3 From 9a58a80a701bdb2d220cdab4914218df5b48d781 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 14 Jan 2010 03:10:54 -0800 Subject: proc_fops: convert drivers/isdn/ to seq_file Convert code away from ->read_proc/->write_proc interfaces. Switch to proc_create()/proc_create_data() which make addition of proc entries reliable wrt NULL ->proc_fops, NULL ->data and so on. Problem with ->read_proc et al is described here commit 786d7e1612f0b0adb6046f19b906609e4fe8b1ba "Fix rmmod/read/write races in /proc entries" [akpm@linux-foundation.org: CONFIG_PROC_FS=n build fix] Signed-off-by: Alexey Dobriyan Signed-off-by: Tilman Schmidt Signed-off-by: Karsten Keil Signed-off-by: David S. Miller --- include/linux/isdn/capilli.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/isdn/capilli.h b/include/linux/isdn/capilli.h index 7acb87a44872..d3e5e9da0c82 100644 --- a/include/linux/isdn/capilli.h +++ b/include/linux/isdn/capilli.h @@ -50,8 +50,7 @@ struct capi_ctr { u16 (*send_message)(struct capi_ctr *, struct sk_buff *skb); char *(*procinfo)(struct capi_ctr *); - int (*ctr_read_proc)(char *page, char **start, off_t off, - int count, int *eof, struct capi_ctr *card); + const struct file_operations *proc_fops; /* filled in before calling ready callback */ u8 manu[CAPI_MANUFACTURER_LEN]; /* CAPI_GET_MANUFACTURER */ -- cgit v1.2.3 From 6d125529c6cbfe570ce3bf9a0728548f087499da Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Dec 2009 06:58:56 -0500 Subject: Fix ACC_MODE() for real commit 5300990c0370e804e49d9a59d928c5d53fb73487 had stepped on a rather nasty mess: definitions of ACC_MODE used to be different. Fixed the resulting breakage, converting them to variant that takes O_... value; all callers have that and it actually simplifies life (see tomoyo part of changes). Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9147ca88f253..b1bcb275b596 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2463,7 +2463,7 @@ int proc_nr_files(struct ctl_table *table, int write, int __init get_filesystem_list(char *buf); -#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) +#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) #define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE)) #endif /* __KERNEL__ */ -- cgit v1.2.3 From d5f1fb53353edc38da326445267c1df0c9676df2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jan 2010 10:53:55 +0800 Subject: lib: Introduce strnstr() It differs strstr() in that it limits the length to be searched in the first string. Signed-off-by: Li Zefan LKML-Reference: <4B4E8743.6030805@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- include/linux/string.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index 651839a2a755..a716ee2a8adb 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -72,7 +72,10 @@ static inline __must_check char *strstrip(char *str) } #ifndef __HAVE_ARCH_STRSTR -extern char * strstr(const char *,const char *); +extern char * strstr(const char *, const char *); +#endif +#ifndef __HAVE_ARCH_STRNSTR +extern char * strnstr(const char *, const char *, size_t); #endif #ifndef __HAVE_ARCH_STRLEN extern __kernel_size_t strlen(const char *); -- cgit v1.2.3 From ad72c347e56bf3a0231b9d686e17764157d2961c Mon Sep 17 00:00:00 2001 From: Christian Pellegrin Date: Thu, 14 Jan 2010 07:08:34 +0000 Subject: can: Proper ctrlmode handling for CAN devices This patch adds error checking of ctrlmode values for CAN devices. As an example all availabe bits are implemented in the mcp251x driver. Signed-off-by: Christian Pellegrin Acked-by: Wolfgang Grandegger Signed-off-by: David S. Miller --- include/linux/can/dev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 7e7c98a3e908..c8c660a79f90 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -38,6 +38,7 @@ struct can_priv { enum can_state state; u32 ctrlmode; + u32 ctrlmode_supported; int restart_ms; struct timer_list restart_timer; -- cgit v1.2.3 From 05c2828c72c4eabf62376adfe27bd24797621f62 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 14 Jan 2010 06:17:09 +0000 Subject: tun: export underlying socket Tun device looks similar to a packet socket in that both pass complete frames from/to userspace. This patch fills in enough fields in the socket underlying tun driver to support sendmsg/recvmsg operations, and message flags MSG_TRUNC and MSG_DONTWAIT, and exports access to this socket to modules. Regular read/write behaviour is unchanged. This way, code using raw sockets to inject packets into a physical device, can support injecting packets into host network stack almost without modification. First user of this interface will be vhost virtualization accelerator. Signed-off-by: Michael S. Tsirkin Acked-by: Herbert Xu Acked-by: David S. Miller Signed-off-by: David S. Miller --- include/linux/if_tun.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 3f5fd523b49d..404abe00162c 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -86,4 +86,18 @@ struct tun_filter { __u8 addr[0][ETH_ALEN]; }; +#ifdef __KERNEL__ +#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) +struct socket *tun_get_socket(struct file *); +#else +#include +#include +struct file; +struct socket; +static inline struct socket *tun_get_socket(struct file *f) +{ + return ERR_PTR(-EINVAL); +} +#endif /* CONFIG_TUN */ +#endif /* __KERNEL__ */ #endif /* __IF_TUN_H */ -- cgit v1.2.3 From 3a4d5c94e959359ece6d6b55045c3f046677f55c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 14 Jan 2010 06:17:27 +0000 Subject: vhost_net: a kernel-level virtio server What it is: vhost net is a character device that can be used to reduce the number of system calls involved in virtio networking. Existing virtio net code is used in the guest without modification. There's similarity with vringfd, with some differences and reduced scope - uses eventfd for signalling - structures can be moved around in memory at any time (good for migration, bug work-arounds in userspace) - write logging is supported (good for migration) - support memory table and not just an offset (needed for kvm) common virtio related code has been put in a separate file vhost.c and can be made into a separate module if/when more backends appear. I used Rusty's lguest.c as the source for developing this part : this supplied me with witty comments I wouldn't be able to write myself. What it is not: vhost net is not a bus, and not a generic new system call. No assumptions are made on how guest performs hypercalls. Userspace hypervisors are supported as well as kvm. How it works: Basically, we connect virtio frontend (configured by userspace) to a backend. The backend could be a network device, or a tap device. Backend is also configured by userspace, including vlan/mac etc. Status: This works for me, and I haven't see any crashes. Compared to userspace, people reported improved latency (as I save up to 4 system calls per packet), as well as better bandwidth and CPU utilization. Features that I plan to look at in the future: - mergeable buffers - zero copy - scalability tuning: figure out the best threading model to use Note on RCU usage (this is also documented in vhost.h, near private_pointer which is the value protected by this variant of RCU): what is happening is that the rcu_dereference() is being used in a workqueue item. The role of rcu_read_lock() is taken on by the start of execution of the workqueue item, of rcu_read_unlock() by the end of execution of the workqueue item, and of synchronize_rcu() by flush_workqueue()/flush_work(). In the future we might need to apply some gcc attribute or sparse annotation to the function passed to INIT_WORK(). Paul's ack below is for this RCU usage. (Includes fixes by Alan Cox , David L Stevens , Chris Wright ) Acked-by: Rusty Russell Acked-by: Arnd Bergmann Acked-by: "Paul E. McKenney" Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/Kbuild | 1 + include/linux/miscdevice.h | 1 + include/linux/vhost.h | 130 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) create mode 100644 include/linux/vhost.h (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 756f831cbdd5..d93080748a91 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -362,6 +362,7 @@ unifdef-y += uio.h unifdef-y += unistd.h unifdef-y += usbdevice_fs.h unifdef-y += utsname.h +unifdef-y += vhost.h unifdef-y += videodev2.h unifdef-y += videodev.h unifdef-y += virtio_config.h diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index adaf3c15e449..8b5f7cc0fba6 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -30,6 +30,7 @@ #define HPET_MINOR 228 #define FUSE_MINOR 229 #define KVM_MINOR 232 +#define VHOST_NET_MINOR 233 #define MISC_DYNAMIC_MINOR 255 struct device; diff --git a/include/linux/vhost.h b/include/linux/vhost.h new file mode 100644 index 000000000000..e847f1e30756 --- /dev/null +++ b/include/linux/vhost.h @@ -0,0 +1,130 @@ +#ifndef _LINUX_VHOST_H +#define _LINUX_VHOST_H +/* Userspace interface for in-kernel virtio accelerators. */ + +/* vhost is used to reduce the number of system calls involved in virtio. + * + * Existing virtio net code is used in the guest without modification. + * + * This header includes interface used by userspace hypervisor for + * device configuration. + */ + +#include +#include +#include +#include +#include + +struct vhost_vring_state { + unsigned int index; + unsigned int num; +}; + +struct vhost_vring_file { + unsigned int index; + int fd; /* Pass -1 to unbind from file. */ + +}; + +struct vhost_vring_addr { + unsigned int index; + /* Option flags. */ + unsigned int flags; + /* Flag values: */ + /* Whether log address is valid. If set enables logging. */ +#define VHOST_VRING_F_LOG 0 + + /* Start of array of descriptors (virtually contiguous) */ + __u64 desc_user_addr; + /* Used structure address. Must be 32 bit aligned */ + __u64 used_user_addr; + /* Available structure address. Must be 16 bit aligned */ + __u64 avail_user_addr; + /* Logging support. */ + /* Log writes to used structure, at offset calculated from specified + * address. Address must be 32 bit aligned. */ + __u64 log_guest_addr; +}; + +struct vhost_memory_region { + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ + __u64 userspace_addr; + __u64 flags_padding; /* No flags are currently specified. */ +}; + +/* All region addresses and sizes must be 4K aligned. */ +#define VHOST_PAGE_SIZE 0x1000 + +struct vhost_memory { + __u32 nregions; + __u32 padding; + struct vhost_memory_region regions[0]; +}; + +/* ioctls */ + +#define VHOST_VIRTIO 0xAF + +/* Features bitmask for forward compatibility. Transport bits are used for + * vhost specific features. */ +#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) +#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) + +/* Set current process as the (exclusive) owner of this file descriptor. This + * must be called before any other vhost command. Further calls to + * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */ +#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) +/* Give up ownership, and reset the device to default values. + * Allows subsequent call to VHOST_OWNER_SET to succeed. */ +#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) + +/* Set up/modify memory layout */ +#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory) + +/* Write logging setup. */ +/* Memory writes can optionally be logged by setting bit at an offset + * (calculated from the physical address) from specified log base. + * The bit is set using an atomic 32 bit operation. */ +/* Set base address for logging. */ +#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) +/* Specify an eventfd file descriptor to signal on log write. */ +#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) + +/* Ring setup. */ +/* Set number of descriptors in ring. This parameter can not + * be modified while ring is running (bound to a device). */ +#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) +/* Set addresses for the ring. */ +#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) +/* Base value where queue looks for available descriptors */ +#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) +/* Get accessor: reads index, writes value in num */ +#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) + +/* The following ioctls use eventfd file descriptors to signal and poll + * for events. */ + +/* Set eventfd to poll for added buffers */ +#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) +/* Set eventfd to signal when buffers have beed used */ +#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) +/* Set eventfd to signal an error */ +#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) + +/* VHOST_NET specific defines */ + +/* Attach virtio net ring to a raw socket, or tap device. + * The socket must be already bound to an ethernet device, this device will be + * used for transmit. Pass fd -1 to unbind from the socket and the transmit + * device. This can be used to stop the ring (e.g. for migration). */ +#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) + +/* Feature bits */ +/* Log all write descriptors. Can be changed while device is active. */ +#define VHOST_F_LOG_ALL 26 +/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ +#define VHOST_NET_F_VIRTIO_NET_HDR 27 + +#endif -- cgit v1.2.3 From 7e105057a34c83cea542dacc55ff0528bce67afa Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Fri, 15 Jan 2010 17:01:02 -0800 Subject: kfifo: fix kfifo_out_locked race bug Fix a wrong optimization in include/linux/kfifo.h which could cause a race in kfifo_out_locked. Signed-off-by: Stefani Seibold Reported-by: Johan Hovold Cc: Pete Zaitcev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 7c6b32a1421c..c4ac88b3c302 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -228,13 +228,6 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo, ret = kfifo_out(fifo, to, n); - /* - * optimization: if the FIFO is empty, set the indices to 0 - * so we don't wrap the next time - */ - if (kfifo_is_empty(fifo)) - kfifo_reset(fifo); - spin_unlock_irqrestore(lock, flags); return ret; -- cgit v1.2.3 From 2427b8e3eaea3719e53bbed7b3375382c3aa6f13 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 15 Jan 2010 17:01:11 -0800 Subject: tty.h: make tty_port_get() static inline I get a few dozen of these warnings when using gcc (GCC) 4.4.1 20090725 (Red Hat 4.4.1-2): In file included from mmotm-2010-0113-1217/init/do_mounts.c:5: mmotm-2010-0113-1217/include/linux/tty.h: In function 'tty_port_get': mmotm-2010-0113-1217/include/linux/tty.h:469: warning: '______f' is static but declared in inline function 'tty_port_get' which is not static so make the function static inline. [akpm@linux-foundation.org: may as well convert tty_port_users() also] Signed-off-by: Randy Dunlap Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/tty.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index ef3a2947b102..6abfcf5b5887 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -464,7 +464,7 @@ extern int tty_port_alloc_xmit_buf(struct tty_port *port); extern void tty_port_free_xmit_buf(struct tty_port *port); extern void tty_port_put(struct tty_port *port); -extern inline struct tty_port *tty_port_get(struct tty_port *port) +static inline struct tty_port *tty_port_get(struct tty_port *port) { if (port) kref_get(&port->kref); @@ -486,7 +486,7 @@ extern void tty_port_close(struct tty_port *port, struct tty_struct *tty, struct file *filp); extern int tty_port_open(struct tty_port *port, struct tty_struct *tty, struct file *filp); -extern inline int tty_port_users(struct tty_port *port) +static inline int tty_port_users(struct tty_port *port) { return port->count + port->blocked_open; } -- cgit v1.2.3 From 8ecc2951534af10e04ddb5e5ff5c6d217b79f5c2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 15 Jan 2010 17:01:12 -0800 Subject: kfifo: use void * pointers for user buffers The pointers to user buffers are currently unsigned char *, which requires a lot of casting in the caller for any non-char typed buffers. Use void * instead. Signed-off-by: Andi Kleen Acked-by: Stefani Seibold Cc: Roland Dreier Cc: Dmitry Torokhov Cc: Andy Walls Cc: Vikram Dhillon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index c4ac88b3c302..6fb495ea956a 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -104,15 +104,15 @@ union { \ #undef __kfifo_initializer -extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer, +extern void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size); extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size, gfp_t gfp_mask); extern void kfifo_free(struct kfifo *fifo); extern unsigned int kfifo_in(struct kfifo *fifo, - const unsigned char *from, unsigned int len); + const void *from, unsigned int len); extern __must_check unsigned int kfifo_out(struct kfifo *fifo, - unsigned char *to, unsigned int len); + void *to, unsigned int len); /** * kfifo_reset - removes the entire FIFO contents @@ -194,7 +194,7 @@ static inline __must_check unsigned int kfifo_avail(struct kfifo *fifo) * bytes copied. */ static inline unsigned int kfifo_in_locked(struct kfifo *fifo, - const unsigned char *from, unsigned int n, spinlock_t *lock) + const void *from, unsigned int n, spinlock_t *lock) { unsigned long flags; unsigned int ret; @@ -219,7 +219,7 @@ static inline unsigned int kfifo_in_locked(struct kfifo *fifo, * @to buffer and returns the number of copied bytes. */ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo, - unsigned char *to, unsigned int n, spinlock_t *lock) + void *to, unsigned int n, spinlock_t *lock) { unsigned long flags; unsigned int ret; -- cgit v1.2.3 From 64ce1037c5434b1d036cd99ecaee6e00496bc2e9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 15 Jan 2010 17:01:15 -0800 Subject: kfifo: sanitize *_user error handling Right now for kfifo_*_user it's not easily possible to distingush between a user copy failing and the FIFO not containing enough data. The problem is that both conditions are multiplexed into the same return code. Avoid this by moving the "copy length" into a separate output parameter and only return 0/-EFAULT in the main return value. I didn't fully adapt the weird "record" variants, those seem to be unused anyways and were rather messy (should they be just removed?) I would appreciate some double checking if I did all the conversions correctly. Signed-off-by: Andi Kleen Cc: Stefani Seibold Cc: Roland Dreier Cc: Dmitry Torokhov Cc: Andy Walls Cc: Vikram Dhillon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 6fb495ea956a..86ad50a900c8 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -235,11 +235,11 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo, extern void kfifo_skip(struct kfifo *fifo, unsigned int len); -extern __must_check unsigned int kfifo_from_user(struct kfifo *fifo, - const void __user *from, unsigned int n); +extern __must_check int kfifo_from_user(struct kfifo *fifo, + const void __user *from, unsigned int n, unsigned *lenout); -extern __must_check unsigned int kfifo_to_user(struct kfifo *fifo, - void __user *to, unsigned int n); +extern __must_check int kfifo_to_user(struct kfifo *fifo, + void __user *to, unsigned int n, unsigned *lenout); /* * __kfifo_add_out internal helper function for updating the out offset -- cgit v1.2.3 From a5b9e2c1063046421ce01dcf5ddd7ec12567f3e1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 15 Jan 2010 17:01:16 -0800 Subject: kfifo: add kfifo_out_peek In some upcoming code it's useful to peek into a FIFO without permanentely removing data. This patch implements a new kfifo_out_peek() to do this. Signed-off-by: Andi Kleen Acked-by: Stefani Seibold Cc: Roland Dreier Cc: Dmitry Torokhov Cc: Andy Walls Cc: Vikram Dhillon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 86ad50a900c8..7ad6d32dd673 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -113,6 +113,9 @@ extern unsigned int kfifo_in(struct kfifo *fifo, const void *from, unsigned int len); extern __must_check unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len); +extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo, + void *to, unsigned int len, unsigned offset); + /** * kfifo_reset - removes the entire FIFO contents -- cgit v1.2.3 From d994ffc247f7c4a48b848f10c4c01c9b06411ada Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 15 Jan 2010 17:01:17 -0800 Subject: kfifo: add kfifo_initialized Simple inline that checks if kfifo_init() has been executed on a fifo. This is useful for walking all per CPU fifos, when some of them might not have been brought up yet. Signed-off-by: Andi Kleen Acked-by: Stefani Seibold Cc: Roland Dreier Cc: Dmitry Torokhov Cc: Andy Walls Cc: Vikram Dhillon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 7ad6d32dd673..c8618243ca5a 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -116,6 +116,16 @@ extern __must_check unsigned int kfifo_out(struct kfifo *fifo, extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo, void *to, unsigned int len, unsigned offset); +/** + * kfifo_initialized - Check if kfifo is initialized. + * @fifo: fifo to check + * Return %true if FIFO is initialized, otherwise %false. + * Assumes the fifo was 0 before. + */ +static inline bool kfifo_initialized(struct kfifo *fifo) +{ + return fifo->buffer != 0; +} /** * kfifo_reset - removes the entire FIFO contents -- cgit v1.2.3 From 5dab600e6a153ceb64832f608069e6c08185411a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 15 Jan 2010 17:01:17 -0800 Subject: kfifo: document everywhere that size has to be power of two On my first try using them I missed that the fifos need to be power of two, resulting in a runtime bug. Document that requirement everywhere (and fix one grammar bug) Signed-off-by: Andi Kleen Acked-by: Stefani Seibold Cc: Roland Dreier Cc: Dmitry Torokhov Cc: Andy Walls Cc: Vikram Dhillon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index c8618243ca5a..6f6c5f300af6 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -67,7 +67,7 @@ struct kfifo { /** * DECLARE_KFIFO - macro to declare a kfifo and the associated buffer * @name: name of the declared kfifo datatype - * @size: size of the fifo buffer + * @size: size of the fifo buffer. Must be a power of two. * * Note1: the macro can be used inside struct or union declaration * Note2: the macro creates two objects: @@ -91,7 +91,7 @@ union { \ /** * DEFINE_KFIFO - macro to define and initialize a kfifo * @name: name of the declared kfifo datatype - * @size: size of the fifo buffer + * @size: size of the fifo buffer. Must be a power of two. * * Note1: the macro can be used for global and local kfifo data type variables * Note2: the macro creates two objects: -- cgit v1.2.3 From cc8ef6eb21e964b1c5eb97b2d0e8ac9893e1bf86 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 15 Jan 2010 17:01:22 -0800 Subject: kernel.h: add BUILD_BUG_ON_NOT_POWER_OF_2() Add BUILD_BUG_ON_NOT_POWER_OF_2() When code relies on a constant being a power of 2: #define FOO 512 /* must be a power of 2 */ it would be nice to be able to do: BUILD_BUG_ON(!is_power_of_2(FOO)); However applying an inline function does not result in a compile-time constant that can be used with BUILD_BUG_ON(), so trying that gives results in: error: bit-field '' width not an integer constant As suggested by akpm, rather than monkeying around with is_power_of_2() and risking gcc warts about constant expressions, just create a macro BUILD_BUG_ON_NOT_POWER_OF_2() to encapsulate this common requirement. Signed-off-by: Roland Dreier Cc: Bart Van Assche Cc: David Dillow Cc: "Robert P. J. Day" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3fc9f5aab5f8..328bca609b9b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -734,6 +734,10 @@ struct sysinfo { /* Force a compilation error if condition is constant and true */ #define MAYBE_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)])) +/* Force a compilation error if a constant expression is not a power of 2 */ +#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) + /* Force a compilation error if condition is true, but also produce a result (of value 0 and type size_t), so the expression can be used e.g. in a structure initializer (or where-ever else comma expressions -- cgit v1.2.3 From 1e2ae599d37e60958c03ca5e46b1f657619a30cd Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 15 Jan 2010 17:01:33 -0800 Subject: nommu: struct vm_region's vm_usage count need not be atomic The vm_usage count field in struct vm_region does not need to be atomic as it's only even modified whilst nommu_region_sem is write locked. Signed-off-by: David Howells Acked-by: Al Viro Cc: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 84d020bed083..80cfa78a8cf6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -122,7 +122,7 @@ struct vm_region { unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ struct file *vm_file; /* the backing file or NULL */ - atomic_t vm_usage; /* region usage count */ + int vm_usage; /* region usage count (access under nommu_region_sem) */ bool vm_icache_flushed : 1; /* true if the icache has been flushed for * this region */ }; -- cgit v1.2.3 From efc1a3b16930c41d64ffefde16b87d82f603a8a0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 15 Jan 2010 17:01:35 -0800 Subject: nommu: don't need get_unmapped_area() for NOMMU get_unmapped_area() is unnecessary for NOMMU as no-one calls it. Signed-off-by: David Howells Acked-by: Al Viro Cc: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 ++ include/linux/sched.h | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 80cfa78a8cf6..36f96271306c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -205,10 +205,12 @@ struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ struct rb_root mm_rb; struct vm_area_struct * mmap_cache; /* last find_vma result */ +#ifdef CONFIG_MMU unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); void (*unmap_area) (struct mm_struct *mm, unsigned long addr); +#endif unsigned long mmap_base; /* base of mmap area */ unsigned long task_size; /* size of task vm space */ unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 8d4991be9d53..6f7bba93929b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -377,6 +377,8 @@ extern int sysctl_max_map_count; #include +#ifdef CONFIG_MMU +extern void arch_pick_mmap_layout(struct mm_struct *mm); extern unsigned long arch_get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); @@ -386,6 +388,9 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long flags); extern void arch_unmap_area(struct mm_struct *, unsigned long); extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); +#else +static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} +#endif #if USE_SPLIT_PTLOCKS /* @@ -2491,8 +2496,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #endif /* CONFIG_SMP */ -extern void arch_pick_mmap_layout(struct mm_struct *mm); - #ifdef CONFIG_TRACING extern void __trace_special(void *__tr, void *__data, -- cgit v1.2.3 From 7e6608724c640924aad1d556d17df33ebaa6124d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 15 Jan 2010 17:01:39 -0800 Subject: nommu: fix shared mmap after truncate shrinkage problems Fix a problem in NOMMU mmap with ramfs whereby a shared mmap can happen over the end of a truncation. The problem is that ramfs_nommu_check_mappings() checks that the reduced file size against the VMA tree, but not the vm_region tree. The following sequence of events can cause the problem: fd = open("/tmp/x", O_RDWR|O_TRUNC|O_CREAT, 0600); ftruncate(fd, 32 * 1024); a = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); b = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); munmap(a, 32 * 1024); ftruncate(fd, 16 * 1024); c = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); Mapping 'a' creates a vm_region covering 32KB of the file. Mapping 'b' sees that the vm_region from 'a' is covering the region it wants and so shares it, pinning it in memory. Mapping 'a' then goes away and the file is truncated to the end of VMA 'b'. However, the region allocated by 'a' is still in effect, and has _not_ been reduced. Mapping 'c' is then created, and because there's a vm_region covering the desired region, get_unmapped_area() is _not_ called to repeat the check, and the mapping is granted, even though the pages from the latter half of the mapping have been discarded. However: d = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); Mapping 'd' should work, and should end up sharing the region allocated by 'a'. To deal with this, we shrink the vm_region struct during the truncation, lest do_mmap_pgoff() take it as licence to share the full region automatically without calling the get_unmapped_area() file op again. Signed-off-by: David Howells Acked-by: Al Viro Cc: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2265f28eb47a..60c467bfbabd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1089,6 +1089,7 @@ extern void zone_pcp_update(struct zone *zone); /* nommu.c */ extern atomic_long_t mmap_pages_allocated; +extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); /* prio_tree.c */ void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); -- cgit v1.2.3 From 9dffe2a32b0deef52605d50527c0d240b15cabf7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 4 Jan 2010 18:05:00 +0000 Subject: mfd: Correct WM835x ISINK ramp time defines The constants used to specify ISINK ramp times for WM835x had the wrong shifts so that the on times applied to the off ramp and vice versa. The masks for the bitfields are correct. Signed-off-by: Mark Brown Cc: stable@kernel.org Signed-off-by: Samuel Ortiz --- include/linux/mfd/wm8350/pmic.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8350/pmic.h b/include/linux/mfd/wm8350/pmic.h index be3264e286e0..e786fe9841ef 100644 --- a/include/linux/mfd/wm8350/pmic.h +++ b/include/linux/mfd/wm8350/pmic.h @@ -666,20 +666,20 @@ #define WM8350_ISINK_FLASH_DUR_64MS (1 << 8) #define WM8350_ISINK_FLASH_DUR_96MS (2 << 8) #define WM8350_ISINK_FLASH_DUR_1024MS (3 << 8) -#define WM8350_ISINK_FLASH_ON_INSTANT (0 << 4) -#define WM8350_ISINK_FLASH_ON_0_25S (1 << 4) -#define WM8350_ISINK_FLASH_ON_0_50S (2 << 4) -#define WM8350_ISINK_FLASH_ON_1_00S (3 << 4) -#define WM8350_ISINK_FLASH_ON_1_95S (1 << 4) -#define WM8350_ISINK_FLASH_ON_3_91S (2 << 4) -#define WM8350_ISINK_FLASH_ON_7_80S (3 << 4) -#define WM8350_ISINK_FLASH_OFF_INSTANT (0 << 0) -#define WM8350_ISINK_FLASH_OFF_0_25S (1 << 0) -#define WM8350_ISINK_FLASH_OFF_0_50S (2 << 0) -#define WM8350_ISINK_FLASH_OFF_1_00S (3 << 0) -#define WM8350_ISINK_FLASH_OFF_1_95S (1 << 0) -#define WM8350_ISINK_FLASH_OFF_3_91S (2 << 0) -#define WM8350_ISINK_FLASH_OFF_7_80S (3 << 0) +#define WM8350_ISINK_FLASH_ON_INSTANT (0 << 0) +#define WM8350_ISINK_FLASH_ON_0_25S (1 << 0) +#define WM8350_ISINK_FLASH_ON_0_50S (2 << 0) +#define WM8350_ISINK_FLASH_ON_1_00S (3 << 0) +#define WM8350_ISINK_FLASH_ON_1_95S (1 << 0) +#define WM8350_ISINK_FLASH_ON_3_91S (2 << 0) +#define WM8350_ISINK_FLASH_ON_7_80S (3 << 0) +#define WM8350_ISINK_FLASH_OFF_INSTANT (0 << 4) +#define WM8350_ISINK_FLASH_OFF_0_25S (1 << 4) +#define WM8350_ISINK_FLASH_OFF_0_50S (2 << 4) +#define WM8350_ISINK_FLASH_OFF_1_00S (3 << 4) +#define WM8350_ISINK_FLASH_OFF_1_95S (1 << 4) +#define WM8350_ISINK_FLASH_OFF_3_91S (2 << 4) +#define WM8350_ISINK_FLASH_OFF_7_80S (3 << 4) /* * Regulator Interrupts. -- cgit v1.2.3 From 64e8867ba8098b69889c1af94997a5ba2348fb26 Mon Sep 17 00:00:00 2001 From: Ian Molton Date: Wed, 6 Jan 2010 13:51:48 +0100 Subject: mfd: tmio_mmc hardware abstraction for CNF area This patch abstracts out the CNF area code from tmio_mmc which is not present in all hardware that can use this driver. This is required so that we can support non-toshiba based hardware. ASIC3 support by Philipp Zabel Signed-off-by: Ian Molton Signed-off-by: Magnus Damm Signed-off-by: Samuel Ortiz --- include/linux/mfd/tmio.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 6b9c5d06690c..9cb1834deffa 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -2,6 +2,8 @@ #define MFD_TMIO_H #include +#include +#include #define tmio_ioread8(addr) readb(addr) #define tmio_ioread16(addr) readw(addr) @@ -18,11 +20,48 @@ writew((val) >> 16, (addr) + 2); \ } while (0) +#define CNF_CMD 0x04 +#define CNF_CTL_BASE 0x10 +#define CNF_INT_PIN 0x3d +#define CNF_STOP_CLK_CTL 0x40 +#define CNF_GCLK_CTL 0x41 +#define CNF_SD_CLK_MODE 0x42 +#define CNF_PIN_STATUS 0x44 +#define CNF_PWR_CTL_1 0x48 +#define CNF_PWR_CTL_2 0x49 +#define CNF_PWR_CTL_3 0x4a +#define CNF_CARD_DETECT_MODE 0x4c +#define CNF_SD_SLOT 0x50 +#define CNF_EXT_GCLK_CTL_1 0xf0 +#define CNF_EXT_GCLK_CTL_2 0xf1 +#define CNF_EXT_GCLK_CTL_3 0xf9 +#define CNF_SD_LED_EN_1 0xfa +#define CNF_SD_LED_EN_2 0xfe + +#define SDCREN 0x2 /* Enable access to MMC CTL regs. (flag in COMMAND_REG)*/ + +#define sd_config_write8(base, shift, reg, val) \ + tmio_iowrite8((val), (base) + ((reg) << (shift))) +#define sd_config_write16(base, shift, reg, val) \ + tmio_iowrite16((val), (base) + ((reg) << (shift))) +#define sd_config_write32(base, shift, reg, val) \ + do { \ + tmio_iowrite16((val), (base) + ((reg) << (shift))); \ + tmio_iowrite16((val) >> 16, (base) + ((reg + 2) << (shift))); \ + } while (0) + +int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); +int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); +void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state); +void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state); + /* * data for the MMC controller */ struct tmio_mmc_data { const unsigned int hclk; + void (*set_pwr)(struct platform_device *host, int state); + void (*set_clk_div)(struct platform_device *host, int state); }; /* -- cgit v1.2.3 From 4f9c85a1b03bfa5c0a0d8488a3a7766f3c9fb756 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 18 Jan 2010 05:37:16 +0000 Subject: phylib: Move workqueue initialization to a proper place commit 541cd3ee00a4fe975b22fac6a3bc846bacef37f7 ("phylib: Fix deadlock on resume") caused TI DaVinci EMAC ethernet driver to oops upon resume: PM: resume of devices complete after 237.098 msecs Restarting tasks ... done. kernel BUG at kernel/workqueue.c:354! Unable to handle kernel NULL pointer dereference at virtual address 00000000 [...] Backtrace: [] (__bug+0x0/0x2c) from [] (queue_delayed_work_on+0x74/0xf8) [] (queue_delayed_work_on+0x0/0xf8) from [] (queue_delayed_work+0x2c/0x30) The oops pops up because TI DaVinci EMAC driver detaches PHY on suspend and attaches it back on resume. Attaching makes phylib call phy_start_machine() that initializes a workqueue. On the other hand, PHY's resume routine will call phy_start_machine() again, and that will cause the oops since we just destroyed the already scheduled workqueue. This patch fixes the issue by moving workqueue initialization to phy_device_create(). p.s. We don't see this oops with ucc_geth and gianfar drivers because they perform a fine-grained suspend, i.e. they just stop the PHYs without detaching. Reported-by: Sekhar Nori Signed-off-by: Anton Vorontsov Tested-by: Sekhar Nori Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7968defd2fa7..6a7eb402165d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -485,6 +485,7 @@ void phy_driver_unregister(struct phy_driver *drv); int phy_driver_register(struct phy_driver *new_driver); void phy_prepare_link(struct phy_device *phydev, void (*adjust_link)(struct net_device *)); +void phy_state_machine(struct work_struct *work); void phy_start_machine(struct phy_device *phydev, void (*handler)(struct net_device *)); void phy_stop_machine(struct phy_device *phydev); -- cgit v1.2.3 From 11380a4b2d86fae9a6bce75c9373668cc323fe57 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 19 Jan 2010 13:46:10 -0800 Subject: net: Unexport napi_gro_flush(). Nothing outside of net/core/dev.c uses it. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a3fccc85b1a0..468a11dea58c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1527,7 +1527,6 @@ extern int netif_rx(struct sk_buff *skb); extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); -extern void napi_gro_flush(struct napi_struct *napi); extern gro_result_t dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb); extern gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb); -- cgit v1.2.3 From 04a723ea9c53ba608b0411aa36948bb57c51a08e Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Wed, 6 Jan 2010 10:16:51 -0800 Subject: USB: Fix duplicate sysfs problem after device reset. Borislav Petkov reports issues with duplicate sysfs endpoint files after a resume from a hibernate. It turns out that the code to support alternate settings under xHCI has issues when a device with a non-default alternate setting is reset during the hibernate: [ 427.681810] Restarting tasks ... [ 427.681995] hub 1-0:1.0: state 7 ports 6 chg 0004 evt 0000 [ 427.682019] usb usb3: usb resume [ 427.682030] ohci_hcd 0000:00:12.0: wakeup root hub [ 427.682191] hub 1-0:1.0: port 2, status 0501, change 0000, 480 Mb/s [ 427.682205] usb 1-2: usb wakeup-resume [ 427.682226] usb 1-2: finish reset-resume [ 427.682886] done. [ 427.734658] ehci_hcd 0000:00:12.2: port 2 high speed [ 427.734663] ehci_hcd 0000:00:12.2: GetStatus port 2 status 001005 POWER sig=se0 PE CONNECT [ 427.746682] hub 3-0:1.0: hub_reset_resume [ 427.746693] hub 3-0:1.0: trying to enable port power on non-switchable hub [ 427.786715] usb 1-2: reset high speed USB device using ehci_hcd and address 2 [ 427.839653] ehci_hcd 0000:00:12.2: port 2 high speed [ 427.839666] ehci_hcd 0000:00:12.2: GetStatus port 2 status 001005 POWER sig=se0 PE CONNECT [ 427.847717] ohci_hcd 0000:00:12.0: GetStatus roothub.portstatus [1] = 0x00010100 CSC PPS [ 427.915497] hub 1-2:1.0: remove_intf_ep_devs: if: ffff88022f9e8800 ->ep_devs_created: 1 [ 427.915774] hub 1-2:1.0: remove_intf_ep_devs: bNumEndpoints: 1 [ 427.915934] hub 1-2:1.0: if: ffff88022f9e8800: endpoint devs removed. [ 427.916158] hub 1-2:1.0: create_intf_ep_devs: if: ffff88022f9e8800 ->ep_devs_created: 0, ->unregistering: 0 [ 427.916434] hub 1-2:1.0: create_intf_ep_devs: bNumEndpoints: 1 [ 427.916609] ep_81: create, parent hub [ 427.916632] ------------[ cut here ]------------ [ 427.916644] WARNING: at fs/sysfs/dir.c:477 sysfs_add_one+0x82/0x96() [ 427.916649] Hardware name: System Product Name [ 427.916653] sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:12.2/usb1/1-2/1-2:1.0/ep_81' [ 427.916658] Modules linked in: binfmt_misc kvm_amd kvm powernow_k8 cpufreq_ondemand cpufreq_powersave cpufreq_userspace freq_table cpufreq_conservative ipv6 vfat fat +8250_pnp 8250 pcspkr ohci_hcd serial_core k10temp edac_core [ 427.916694] Pid: 278, comm: khubd Not tainted 2.6.33-rc2-00187-g08d869a-dirty #13 [ 427.916699] Call Trace: The problem is caused by a mismatch between the USB core's view of the device state and the USB device and xHCI host's view of the device state. After the device reset and re-configuration, the device and the xHCI host think they are using alternate setting 0 of all interfaces. However, the USB core keeps track of the old state, which may include non-zero alternate settings. It uses intf->cur_altsetting to keep the endpoint sysfs files for the old state across the reset. The bandwidth allocation functions need to know what the xHCI host thinks the current alternate settings are, so original patch set intf->cur_altsetting to the alternate setting 0. This caused duplicate endpoint files to be created. The solution is to not set intf->cur_altsetting before calling usb_set_interface() in usb_reset_and_verify_device(). Instead, we add a new flag to struct usb_interface to tell usb_hcd_alloc_bandwidth() to use alternate setting 0 as the currently installed alternate setting. Signed-off-by: Sarah Sharp Tested-by: Borislav Petkov Cc: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index e101a2d04d75..d7ace1b80f09 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -192,6 +192,7 @@ struct usb_interface { unsigned needs_altsetting0:1; /* switch to altsetting 0 is pending */ unsigned needs_binding:1; /* needs delayed unbind/rebind */ unsigned reset_running:1; + unsigned resetting_device:1; /* true: bandwidth alloc after reset */ struct device dev; /* interface specific device info */ struct device *usb_dev; -- cgit v1.2.3 From 50b926e439620c469565e8be0f28be78f5fca1ce Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 4 Jan 2010 14:44:56 +0100 Subject: sched: Fix vmark regression on big machines SD_PREFER_SIBLING is set at the CPU domain level if power saving isn't enabled, leading to many cache misses on large machines as we traverse looking for an idle shared cache to wake to. Change the enabler of select_idle_sibling() to SD_SHARE_PKG_RESOURCES, and enable same at the sibling domain level. Reported-by: Lin Ming Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra LKML-Reference: <1262612696.15495.15.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- include/linux/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 57e63579bfdd..5b81156780b1 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -99,7 +99,7 @@ int arch_update_cpu_topology(void); | 1*SD_WAKE_AFFINE \ | 1*SD_SHARE_CPUPOWER \ | 0*SD_POWERSAVINGS_BALANCE \ - | 0*SD_SHARE_PKG_RESOURCES \ + | 1*SD_SHARE_PKG_RESOURCES \ | 0*SD_SERIALIZE \ | 0*SD_PREFER_SIBLING \ , \ -- cgit v1.2.3 From 92b6759857ea3ad19bc6871044e373f6251841d3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Jan 2010 14:02:16 +0100 Subject: perf: Change the is_software_event() definition The is_software_event() definition always confuses me because its an exclusive expression, make it an inclusive one. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c66b34f75eea..8fa71874113f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -814,9 +814,14 @@ extern int perf_event_overflow(struct perf_event *event, int nmi, */ static inline int is_software_event(struct perf_event *event) { - return (event->attr.type != PERF_TYPE_RAW) && - (event->attr.type != PERF_TYPE_HARDWARE) && - (event->attr.type != PERF_TYPE_HW_CACHE); + switch (event->attr.type) { + case PERF_TYPE_SOFTWARE: + case PERF_TYPE_TRACEPOINT: + /* for now the breakpoint stuff also works as software event */ + case PERF_TYPE_BREAKPOINT: + return 1; + } + return 0; } extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; -- cgit v1.2.3 From d0dd2de0d055f0ffb1e2ecdc21380de9d12a85e2 Mon Sep 17 00:00:00 2001 From: Andriy Tkachuk Date: Wed, 20 Jan 2010 13:55:06 +0200 Subject: mac80211: Account HT Control field in Data frame hdrlen according to 802.11n-2009 ieee80211_hdrlen() should account account new HT Control field in 802.11 data frame header introduced by IEEE 802.11n standard. According to 802.11n-2009 HT Control field is present in data frames when both of following are met: 1. It is QoS data frame. 2. Order bit is set in Frame Control field. The change might be totally compatible with legacy non-11n aware frames, because 802.11-2007 standard states that "all QoS STAs set this subfield to 0". Signed-off-by: Andriy V. Tkachuk Acked-by : Benoit Papillault Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 842701906ae9..19984958ab7b 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -138,6 +138,8 @@ #define IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK 0x03 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT 5 +#define IEEE80211_HT_CTL_LEN 4 + struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; -- cgit v1.2.3 From a271623f871dda970319ca15dfad3a8c8c36249f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 22 Jan 2010 10:13:10 +0000 Subject: netdev: remove certain HAVE_ macros After netdev_ops compat code HAVE_* macros aren't needed, in fact they _will_ result in compile breakage for out of tree drivers. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 468a11dea58c..b5fb51d0b8b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -621,30 +621,21 @@ struct net_device_ops { struct net_device *dev); u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb); -#define HAVE_CHANGE_RX_FLAGS void (*ndo_change_rx_flags)(struct net_device *dev, int flags); -#define HAVE_SET_RX_MODE void (*ndo_set_rx_mode)(struct net_device *dev); -#define HAVE_MULTICAST void (*ndo_set_multicast_list)(struct net_device *dev); -#define HAVE_SET_MAC_ADDR int (*ndo_set_mac_address)(struct net_device *dev, void *addr); -#define HAVE_VALIDATE_ADDR int (*ndo_validate_addr)(struct net_device *dev); -#define HAVE_PRIVATE_IOCTL int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); -#define HAVE_SET_CONFIG int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); -#define HAVE_CHANGE_MTU int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); int (*ndo_neigh_setup)(struct net_device *dev, struct neigh_parms *); -#define HAVE_TX_TIMEOUT void (*ndo_tx_timeout) (struct net_device *dev); struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); @@ -656,7 +647,6 @@ struct net_device_ops { void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); #ifdef CONFIG_NET_POLL_CONTROLLER -#define HAVE_NETDEV_POLL void (*ndo_poll_controller)(struct net_device *dev); #endif #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) -- cgit v1.2.3 From 32e7bfc41110bc8f29ec0f293c3bcee6645fef34 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Jan 2010 13:36:10 -0800 Subject: net: use helpers to access uc list V2 This patch introduces three macros to work with uc list from net drivers. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b5fb51d0b8b1..93a32a5ca74f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -263,6 +263,11 @@ struct netdev_hw_addr_list { int count; }; +#define netdev_uc_count(dev) ((dev)->uc.count) +#define netdev_uc_empty(dev) ((dev)->uc.count == 0) +#define netdev_for_each_uc_addr(ha, dev) \ + list_for_each_entry(ha, &dev->uc.list, list) + struct hh_cache { struct hh_cache *hh_next; /* Next entry */ atomic_t hh_refcnt; /* number of users */ -- cgit v1.2.3 From c85bb41e93184bf5494dde6d8fe5a81b564c84c8 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Tue, 2 Feb 2010 07:32:29 -0800 Subject: igmp: fix ip_mc_sf_allow race [v5] Almost all igmp functions accessing inet->mc_list are protected by rtnl_lock(), but there is one exception which is ip_mc_sf_allow(), so there is a chance of either ip_mc_drop_socket or ip_mc_leave_group remove an entry while ip_mc_sf_allow is running causing a crash. Signed-off-by: Flavio Leitner Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 724c27e5d173..93fc2449af10 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -153,6 +153,7 @@ extern int sysctl_igmp_max_msf; struct ip_sf_socklist { unsigned int sl_max; unsigned int sl_count; + struct rcu_head rcu; __be32 sl_addr[0]; }; @@ -170,6 +171,7 @@ struct ip_mc_socklist { struct ip_mreqn multi; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ip_sf_socklist *sflist; + struct rcu_head rcu; }; struct ip_sf_list { -- cgit v1.2.3 From f9bfbebf34eab707b065116cdc9699d25ba4252a Mon Sep 17 00:00:00 2001 From: Shirley Ma Date: Fri, 29 Jan 2010 03:19:05 +0000 Subject: virtio: Add ability to detach unused buffers from vrings There's currently no way for a virtio driver to ask for unused buffers, so it has to keep a list itself to reclaim them at shutdown. This is redundant, since virtio_ring stores that information. So add a new hook to do this. Signed-off-by: Shirley Ma Signed-off-by: Amit Shah Signed-off-by: Rusty Russell Signed-off-by: David S. Miller --- include/linux/virtio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 057a2e010758..f508c651e53d 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -51,6 +51,9 @@ struct virtqueue { * This re-enables callbacks; it returns "false" if there are pending * buffers in the queue, to detect a possible race between the driver * checking for more work, and enabling callbacks. + * @detach_unused_buf: detach first unused buffer + * vq: the struct virtqueue we're talking about. + * Returns NULL or the "data" token handed to add_buf * * Locking rules are straightforward: the driver is responsible for * locking. No two operations may be invoked simultaneously, with the exception @@ -71,6 +74,7 @@ struct virtqueue_ops { void (*disable_cb)(struct virtqueue *vq); bool (*enable_cb)(struct virtqueue *vq); + void *(*detach_unused_buf)(struct virtqueue *vq); }; /** -- cgit v1.2.3 From f98bfbd78c37c5946cc53089da32a5f741efdeb7 Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Tue, 2 Feb 2010 15:58:48 -0800 Subject: connector: Delete buggy notification code. On Tue, Feb 02, 2010 at 02:57:14PM -0800, Greg KH (gregkh@suse.de) wrote: > > There are at least two ways to fix it: using a big cannon and a small > > one. The former way is to disable notification registration, since it is > > not used by anyone at all. Second way is to check whether calling > > process is root and its destination group is -1 (kind of priveledged > > one) before command is dispatched to workqueue. > > Well if no one is using it, removing it makes the most sense, right? > > No objection from me, care to make up a patch either way for this? Getting it is not used, let's drop support for notifications about (un)registered events from connector. Another option was to check credentials on receiving, but we can always restore it without bugs if needed, but genetlink has a wider code base and none complained, that userspace can not get notification when some other clients were (un)registered. Kudos for Sebastian Krahmer , who found a bug in the code. Signed-off-by: Evgeniy Polyakov Acked-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- include/linux/connector.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/connector.h b/include/linux/connector.h index 72ba63eb83c5..3a779ffba60b 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -24,9 +24,6 @@ #include -#define CN_IDX_CONNECTOR 0xffffffff -#define CN_VAL_CONNECTOR 0xffffffff - /* * Process Events connector unique ids -- used for message routing */ @@ -75,30 +72,6 @@ struct cn_msg { __u8 data[0]; }; -/* - * Notify structure - requests notification about - * registering/unregistering idx/val in range [first, first+range]. - */ -struct cn_notify_req { - __u32 first; - __u32 range; -}; - -/* - * Main notification control message - * *_notify_num - number of appropriate cn_notify_req structures after - * this struct. - * group - notification receiver's idx. - * len - total length of the attached data. - */ -struct cn_ctl_msg { - __u32 idx_notify_num; - __u32 val_notify_num; - __u32 group; - __u32 len; - __u8 data[0]; -}; - #ifdef __KERNEL__ #include @@ -151,11 +124,6 @@ struct cn_callback_entry { u32 seq, group; }; -struct cn_ctl_entry { - struct list_head notify_entry; - struct cn_ctl_msg *msg; -}; - struct cn_dev { struct cb_id id; -- cgit v1.2.3 From 8a83a00b0735190384a348156837918271034144 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 30 Jan 2010 12:23:03 +0000 Subject: net: maintain namespace isolation between vlan and real device In the vlan and macvlan drivers, the start_xmit function forwards data to the dev_queue_xmit function for another device, which may potentially belong to a different namespace. To make sure that classification stays within a single namespace, this resets the potentially critical fields. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 93a32a5ca74f..622ba5aa93c4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1004,6 +1004,15 @@ static inline bool netdev_uses_dsa_tags(struct net_device *dev) return 0; } +#ifndef CONFIG_NET_NS +static inline void skb_set_dev(struct sk_buff *skb, struct net_device *dev) +{ + skb->dev = dev; +} +#else /* CONFIG_NET_NS */ +void skb_set_dev(struct sk_buff *skb, struct net_device *dev); +#endif + static inline bool netdev_uses_trailer_tags(struct net_device *dev) { #ifdef CONFIG_NET_DSA_TAG_TRAILER -- cgit v1.2.3 From fc0663d6b5e6d8e9b57f872a644c0aafd82361b7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 30 Jan 2010 12:23:40 +0000 Subject: macvlan: allow multiple driver backends This makes it possible to hook into the macvlan driver from another kernel module. In particular, the goal is to extend it with the macvtap backend that provides a tun/tap compatible interface directly on the macvlan device. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 70 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 5f200bac3749..9a11544bb0b1 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -1,6 +1,76 @@ #ifndef _LINUX_IF_MACVLAN_H #define _LINUX_IF_MACVLAN_H +#include +#include +#include +#include +#include + +struct macvlan_port; +struct macvtap_queue; + +/** + * struct macvlan_rx_stats - MACVLAN percpu rx stats + * @rx_packets: number of received packets + * @rx_bytes: number of received bytes + * @multicast: number of received multicast packets + * @rx_errors: number of errors + */ +struct macvlan_rx_stats { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long multicast; + unsigned long rx_errors; +}; + +struct macvlan_dev { + struct net_device *dev; + struct list_head list; + struct hlist_node hlist; + struct macvlan_port *port; + struct net_device *lowerdev; + struct macvlan_rx_stats *rx_stats; + enum macvlan_mode mode; + int (*receive)(struct sk_buff *skb); + int (*forward)(struct net_device *dev, struct sk_buff *skb); +}; + +static inline void macvlan_count_rx(const struct macvlan_dev *vlan, + unsigned int len, bool success, + bool multicast) +{ + struct macvlan_rx_stats *rx_stats; + + rx_stats = per_cpu_ptr(vlan->rx_stats, smp_processor_id()); + if (likely(success)) { + rx_stats->rx_packets++;; + rx_stats->rx_bytes += len; + if (multicast) + rx_stats->multicast++; + } else { + rx_stats->rx_errors++; + } +} + +extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + int (*receive)(struct sk_buff *skb), + int (*forward)(struct net_device *dev, + struct sk_buff *skb)); + +extern void macvlan_count_rx(const struct macvlan_dev *vlan, + unsigned int len, bool success, + bool multicast); + +extern void macvlan_dellink(struct net_device *dev, struct list_head *head); + +extern int macvlan_link_register(struct rtnl_link_ops *ops); + +extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, + struct net_device *dev); + + extern struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *); #endif /* _LINUX_IF_MACVLAN_H */ -- cgit v1.2.3 From 20d29d7a916a47bf533b5709437fe735b6b5b79e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 30 Jan 2010 12:24:26 +0000 Subject: net: macvtap driver In order to use macvlan with qemu and other tools that require a tap file descriptor, the macvtap driver adds a small backend with a character device with the same interface as the tun driver, with a minimum set of features. Macvtap interfaces are created in the same way as macvlan interfaces using ip link, but the netif is just used as a handle for configuration and accounting, while the data goes through the chardev. Each macvtap interface has its own character device, simplifying permission management significantly over the generic tun/tap driver. Cc: Patrick McHardy Cc: Stephen Hemminger Cc: David S. Miller" Cc: "Michael S. Tsirkin" Cc: Herbert Xu Cc: Or Gerlitz Cc: netdev@vger.kernel.org Cc: bridge@lists.linux-foundation.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 9a11544bb0b1..51f1512045e9 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -34,6 +34,7 @@ struct macvlan_dev { enum macvlan_mode mode; int (*receive)(struct sk_buff *skb); int (*forward)(struct net_device *dev, struct sk_buff *skb); + struct macvtap_queue *tap; }; static inline void macvlan_count_rx(const struct macvlan_dev *vlan, -- cgit v1.2.3 From 1621e0940294c20e302faf401f41204de7252e22 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 1 Feb 2010 09:44:19 +0000 Subject: net: CONFIG_COMPAT redux Ifdef out struct proto_ops::compat_ioctl struct proto_ops::compat_setsockopt struct proto_ops::compat_getsockopt to make structures smaller on COMPAT=n kernels. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/net.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 5e8083cacc8b..4157b5d42bd6 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -174,18 +174,22 @@ struct proto_ops { struct poll_table_struct *wait); int (*ioctl) (struct socket *sock, unsigned int cmd, unsigned long arg); +#ifdef CONFIG_COMPAT int (*compat_ioctl) (struct socket *sock, unsigned int cmd, unsigned long arg); +#endif int (*listen) (struct socket *sock, int len); int (*shutdown) (struct socket *sock, int flags); int (*setsockopt)(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen); int (*getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); +#ifdef CONFIG_COMPAT int (*compat_setsockopt)(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen); int (*compat_getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); +#endif int (*sendmsg) (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len); int (*recvmsg) (struct kiocb *iocb, struct socket *sock, -- cgit v1.2.3 From 6683ece36e3531fc8c75f69e7165c5f20930be88 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 4 Feb 2010 10:22:25 -0800 Subject: net: use helpers to access mc list V2 This patch introduces the similar helpers as those already done for uc list. However multicast lists are no list_head lists but "mademanually". The three macros added by this patch will make the transition of mc_list to list_head smooth in two steps: 1) convert all drivers to use these macros (with the original iterator of type "struct dev_mc_list") 2) once all drivers are converted, convert list type and iterators to "struct netdev_hw_addr" in one patch. >From now on, drivers can (and should) use "netdev_for_each_mc_addr" to iterate over the addresses with iterator of type "struct netdev_hw_addr". Also macros "netdev_mc_count" and "netdev_mc_empty" to read list's length. This is the state which should be reached in all drivers. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 622ba5aa93c4..e535700a3b72 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -268,6 +268,12 @@ struct netdev_hw_addr_list { #define netdev_for_each_uc_addr(ha, dev) \ list_for_each_entry(ha, &dev->uc.list, list) +#define netdev_mc_count(dev) ((dev)->mc_count) +#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0) + +#define netdev_for_each_mc_addr(mclist, dev) \ + for (mclist = dev->mc_list; mclist; mclist = mclist->next) + struct hh_cache { struct hh_cache *hh_next; /* Next entry */ atomic_t hh_refcnt; /* number of users */ -- cgit v1.2.3 From f8f76db1db369f3a130ac3fd33e2eee5f1610d9c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 4 Feb 2010 10:23:02 -0800 Subject: libphy: add phy_find_first function Many drivers do this in them manually. Now they can use this function. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 6a7eb402165d..14d7fdf6a90a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -452,6 +452,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, u32 flags, phy_interface_t interface); struct phy_device * phy_attach(struct net_device *dev, const char *bus_id, u32 flags, phy_interface_t interface); +struct phy_device *phy_find_first(struct mii_bus *bus); int phy_connect_direct(struct net_device *dev, struct phy_device *phydev, void (*handler)(struct net_device *), u32 flags, phy_interface_t interface); -- cgit v1.2.3 From bfd5f4a3d605e0f6054df0b59fe0907ff7e696d3 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Thu, 4 Feb 2010 20:24:10 -0800 Subject: packet: Add GSO/csum offload support. This patch adds GSO/checksum offload to af_packet sockets using virtio_net_hdr. Based on Rusty's patch to add this support to tun. It allows GSO/checksum offload to be enabled when using raw socket backend with virtio_net. Adds PACKET_VNET_HDR socket option to prepend virtio_net_hdr in the receive path and process/skip virtio_net_hdr in the send path. This option is only allowed with SOCK_RAW sockets attached to ethernet type devices. v2 updates ---------- Michael's Comments - Perform length check in packet_snd() when GSO is off even when vnet_hdr is present. - Check for SKB_GSO_FCOE type and return -EINVAL - don't allow tx/rx ring when vnet_hdr is enabled. Herbert's Comments - Removed ethernet specific code. - protocol value is assumed to be passed in by the caller. Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/linux/if_packet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index 4021d47cc437..aa57a5f993fc 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -46,6 +46,7 @@ struct sockaddr_ll { #define PACKET_RESERVE 12 #define PACKET_TX_RING 13 #define PACKET_LOSS 14 +#define PACKET_VNET_HDR 15 struct tpacket_stats { unsigned int tp_packets; -- cgit v1.2.3