From 182d919b84902eece162c63ed3d476c8016b4197 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Feb 2015 23:47:31 +0000 Subject: FS-Cache: Count culled objects and objects rejected due to lack of space Count the number of objects that get culled by the cache backend and the number of objects that the cache backend declines to instantiate due to lack of space in the cache. These numbers are made available through /proc/fs/fscache/stats Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- include/linux/fscache-cache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 771484993ca7..c9dafdaf3347 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -371,6 +371,7 @@ struct fscache_object { #define FSCACHE_OBJECT_IS_LOOKED_UP 4 /* T if object has been looked up */ #define FSCACHE_OBJECT_IS_AVAILABLE 5 /* T if object has become active */ #define FSCACHE_OBJECT_RETIRED 6 /* T if object was retired on relinquishment */ +#define FSCACHE_OBJECT_KILLED_BY_CACHE 7 /* T if object was killed by the cache */ struct list_head cache_link; /* link in cache->object_list */ struct hlist_node cookie_link; /* link in cookie->backing_objects */ @@ -551,4 +552,15 @@ extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object, const void *data, uint16_t datalen); +extern void fscache_object_retrying_stale(struct fscache_object *object); + +enum fscache_why_object_killed { + FSCACHE_OBJECT_IS_STALE, + FSCACHE_OBJECT_NO_SPACE, + FSCACHE_OBJECT_WAS_RETIRED, + FSCACHE_OBJECT_WAS_CULLED, +}; +extern void fscache_object_mark_killed(struct fscache_object *object, + enum fscache_why_object_killed why); + #endif /* _LINUX_FSCACHE_CACHE_H */ -- cgit v1.2.3 From 30ceec6284129662efc3a1e7675b2bd857a046fe Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Feb 2015 10:05:27 +0000 Subject: FS-Cache: When submitting an op, cancel it if the target object is dying When submitting an operation, prefer to cancel the operation immediately rather than queuing it for later processing if the object is marked as dying (ie. the object state machine has reached the KILL_OBJECT state). Whilst we're at it, change the series of related test_bit() calls into a READ_ONCE() and bitwise-AND operators to reduce the number of load instructions (test_bit() has a volatile address). Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- include/linux/fscache-cache.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index c9dafdaf3347..2e83a141e465 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -411,17 +411,22 @@ static inline bool fscache_object_is_available(struct fscache_object *object) return test_bit(FSCACHE_OBJECT_IS_AVAILABLE, &object->flags); } +static inline bool fscache_cache_is_broken(struct fscache_object *object) +{ + return test_bit(FSCACHE_IOERROR, &object->cache->flags); +} + static inline bool fscache_object_is_active(struct fscache_object *object) { return fscache_object_is_available(object) && fscache_object_is_live(object) && - !test_bit(FSCACHE_IOERROR, &object->cache->flags); + !fscache_cache_is_broken(object); } static inline bool fscache_object_is_dead(struct fscache_object *object) { return fscache_object_is_dying(object) && - test_bit(FSCACHE_IOERROR, &object->cache->flags); + fscache_cache_is_broken(object); } /** -- cgit v1.2.3 From 87021526300f1a292dd966e141e183630ac95317 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Feb 2015 10:52:51 +0000 Subject: FS-Cache: fscache_object_is_dead() has wrong logic, kill it fscache_object_is_dead() returns true only if the object is marked dead and the cache got an I/O error. This should be a logical OR instead. Since two of the callers got split up into handling for separate subcases, expand the other callers and kill the function. This is probably the right thing to do anyway since one of the subcases isn't about the object at all, but rather about the cache. Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- include/linux/fscache-cache.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 2e83a141e465..ca3d550da11e 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -423,12 +423,6 @@ static inline bool fscache_object_is_active(struct fscache_object *object) !fscache_cache_is_broken(object); } -static inline bool fscache_object_is_dead(struct fscache_object *object) -{ - return fscache_object_is_dying(object) && - fscache_cache_is_broken(object); -} - /** * fscache_object_destroyed - Note destruction of an object in a cache * @cache: The cache from which the object came -- cgit v1.2.3 From 1339ec98e32b4bc8efb6fbb71c006a465130aaba Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 25 Feb 2015 13:26:39 +0000 Subject: FS-Cache: Out of line fscache_operation_init() Out of line fscache_operation_init() so that it can access internal FS-Cache features, such as stats, in a later commit. Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- include/linux/fscache-cache.h | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index ca3d550da11e..0e26d49972e3 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -119,27 +119,9 @@ extern void fscache_op_work_func(struct work_struct *work); extern void fscache_enqueue_operation(struct fscache_operation *); extern void fscache_op_complete(struct fscache_operation *, bool); extern void fscache_put_operation(struct fscache_operation *); - -/** - * fscache_operation_init - Do basic initialisation of an operation - * @op: The operation to initialise - * @release: The release function to assign - * - * Do basic initialisation of an operation. The caller must still set flags, - * object and processor if needed. - */ -static inline void fscache_operation_init(struct fscache_operation *op, - fscache_operation_processor_t processor, - fscache_operation_release_t release) -{ - INIT_WORK(&op->work, fscache_op_work_func); - atomic_set(&op->usage, 1); - op->state = FSCACHE_OP_ST_INITIALISED; - op->debug_id = atomic_inc_return(&fscache_op_debug_id); - op->processor = processor; - op->release = release; - INIT_LIST_HEAD(&op->pend_link); -} +extern void fscache_operation_init(struct fscache_operation *, + fscache_operation_processor_t, + fscache_operation_release_t); /* * data read operation -- cgit v1.2.3 From d3b97ca4a99e4e6c78f5a21c968eadf5c8ba9971 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Feb 2015 10:05:29 +0000 Subject: FS-Cache: The operation cancellation method needs calling in more places Any time an incomplete operation is cancelled, the operation cancellation function needs to be called to clean up. This is currently being passed directly to some of the functions that might want to call it, but not all. Instead, pass the cancellation method pointer to the fscache_operation_init() and have that cache it in the operation struct. Further, plug in a dummy cancellation handler if the caller declines to set one as this allows us to call the function unconditionally (the extra overhead isn't worth bothering about as we don't expect to be calling this typically). The cancellation method must thence be called everywhere the CANCELLED state is set. Note that we call it *before* setting the CANCELLED state such that the method can use the old state value to guide its operation. fscache_do_cancel_retrieval() needs moving higher up in the sources so that the init function can use it now. Without this, the following oops may be seen: FS-Cache: Assertion failed FS-Cache: 3 == 0 is false ------------[ cut here ]------------ kernel BUG at ../fs/fscache/page.c:261! ... RIP: 0010:[] fscache_release_retrieval_op+0x77/0x100 [] fscache_put_operation+0x114/0x2da [] __fscache_read_or_alloc_pages+0x358/0x3b3 [] __nfs_readpages_from_fscache+0x59/0xbf [nfs] [] nfs_readpages+0x10c/0x185 [nfs] [] ? alloc_pages_current+0x119/0x13e [] ? __page_cache_alloc+0xfb/0x10a [] __do_page_cache_readahead+0x188/0x22c [] ondemand_readahead+0x29e/0x2af [] page_cache_sync_readahead+0x38/0x3a [] generic_file_read_iter+0x1a2/0x55a [] ? nfs_revalidate_mapping+0xd6/0x288 [nfs] [] nfs_file_read+0x49/0x70 [nfs] [] new_sync_read+0x78/0x9c [] __vfs_read+0x13/0x38 [] vfs_read+0x95/0x121 [] SyS_read+0x4c/0x8a [] system_call_fastpath+0x12/0x17 The assertion is showing that the remaining number of pages (n_pages) is not 0 when the operation is being released. Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- include/linux/fscache-cache.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 0e26d49972e3..69c6eb10d858 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -74,6 +74,7 @@ extern wait_queue_head_t fscache_cache_cleared_wq; */ typedef void (*fscache_operation_release_t)(struct fscache_operation *op); typedef void (*fscache_operation_processor_t)(struct fscache_operation *op); +typedef void (*fscache_operation_cancel_t)(struct fscache_operation *op); enum fscache_operation_state { FSCACHE_OP_ST_BLANK, /* Op is not yet submitted */ @@ -109,6 +110,9 @@ struct fscache_operation { * the op in a non-pool thread */ fscache_operation_processor_t processor; + /* Operation cancellation cleanup (optional) */ + fscache_operation_cancel_t cancel; + /* operation releaser */ fscache_operation_release_t release; }; @@ -121,6 +125,7 @@ extern void fscache_op_complete(struct fscache_operation *, bool); extern void fscache_put_operation(struct fscache_operation *); extern void fscache_operation_init(struct fscache_operation *, fscache_operation_processor_t, + fscache_operation_cancel_t, fscache_operation_release_t); /* -- cgit v1.2.3 From 4a47132ff472a0c2c5441baeb50cf97f2580bc43 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Feb 2015 10:05:29 +0000 Subject: FS-Cache: Retain the netfs context in the retrieval op earlier Now that the retrieval operation may be disposed of by fscache_put_operation() before we actually set the context, the retrieval-specific cleanup operation can produce a NULL-pointer dereference when it tries to unconditionally clean up the netfs context. Given that it is expected that we'll get at least as far as the place where we currently set the context pointer and it is unlikely we'll go through the error handling paths prior to that point, retain the context right from the point that the retrieval op is allocated. Concomitant to this, we need to retain the cookie pointer in the retrieval op also so that we can call the netfs to release its context in the release method. In addition, we might now get into fscache_release_retrieval_op() with the op only initialised. To this end, set the operation to DEAD only after the release method has been called and skip the n_pages test upon cleanup if the op is still in the INITIALISED state. Without these changes, the following oops might be seen: BUG: unable to handle kernel NULL pointer dereference at 00000000000000b8 ... RIP: 0010:[] fscache_release_retrieval_op+0xae/0x100 ... Call Trace: [] fscache_put_operation+0x117/0x2e0 [] __fscache_read_or_alloc_pages+0x351/0x3ac [] __nfs_readpages_from_fscache+0x59/0xbf [nfs] [] nfs_readpages+0x10c/0x185 [nfs] [] ? alloc_pages_current+0x119/0x13e [] ? __page_cache_alloc+0xfb/0x10a [] __do_page_cache_readahead+0x188/0x22c [] ondemand_readahead+0x29e/0x2af [] page_cache_sync_readahead+0x38/0x3a [] generic_file_read_iter+0x1a2/0x55a [] ? nfs_revalidate_mapping+0xd6/0x288 [nfs] [] nfs_file_read+0x49/0x70 [nfs] [] new_sync_read+0x78/0x9c [] __vfs_read+0x13/0x38 [] vfs_read+0x95/0x121 [] SyS_read+0x4c/0x8a [] system_call_fastpath+0x12/0x17 Signed-off-by: David Howells Reviewed-by: Steve Dickson Acked-by: Jeff Layton --- include/linux/fscache-cache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 69c6eb10d858..604e1526cd00 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -133,6 +133,7 @@ extern void fscache_operation_init(struct fscache_operation *, */ struct fscache_retrieval { struct fscache_operation op; + struct fscache_cookie *cookie; /* The netfs cookie */ struct address_space *mapping; /* netfs pages */ fscache_rw_complete_t end_io_func; /* function to call on I/O completion */ void *context; /* netfs read context (pinned) */ -- cgit v1.2.3 From fb7737e949e31d8a71acee6bbb670f32dbd2a2c0 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 4 Mar 2015 20:01:14 -0600 Subject: hwspinlock/core: add device tree support This patch adds a new OF-friendly API of_hwspin_lock_get_id() for hwspinlock clients to use/request locks from a hwspinlock device instantiated through a device-tree blob. This new API can be used by hwspinlock clients to get the id for a specific lock using the phandle + args specifier, so that it can be requested using the available hwspin_lock_request_specific() API. Signed-off-by: Suman Anna Reviewed-by: Bjorn Andersson [small comment clarification] Signed-off-by: Ohad Ben-Cohen --- include/linux/hwspinlock.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h index 3343298e40e8..859d673d98c8 100644 --- a/include/linux/hwspinlock.h +++ b/include/linux/hwspinlock.h @@ -26,6 +26,7 @@ #define HWLOCK_IRQ 0x02 /* Disable interrupts, don't save state */ struct device; +struct device_node; struct hwspinlock; struct hwspinlock_device; struct hwspinlock_ops; @@ -66,6 +67,7 @@ int hwspin_lock_unregister(struct hwspinlock_device *bank); struct hwspinlock *hwspin_lock_request(void); struct hwspinlock *hwspin_lock_request_specific(unsigned int id); int hwspin_lock_free(struct hwspinlock *hwlock); +int of_hwspin_lock_get_id(struct device_node *np, int index); int hwspin_lock_get_id(struct hwspinlock *hwlock); int __hwspin_lock_timeout(struct hwspinlock *, unsigned int, int, unsigned long *); @@ -120,6 +122,11 @@ void __hwspin_unlock(struct hwspinlock *hwlock, int mode, unsigned long *flags) { } +static inline int of_hwspin_lock_get_id(struct device_node *np, int index) +{ + return 0; +} + static inline int hwspin_lock_get_id(struct hwspinlock *hwlock) { return 0; -- cgit v1.2.3 From 1b852bceb0d111e510d1a15826ecc4a19358d512 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 23:22:29 -0500 Subject: mnt: Refactor the logic for mounting sysfs and proc in a user namespace Fresh mounts of proc and sysfs are a very special case that works very much like a bind mount. Unfortunately the current structure can not preserve the MNT_LOCK... mount flags. Therefore refactor the logic into a form that can be modified to preserve those lock bits. Add a new filesystem flag FS_USERNS_VISIBLE that requires some mount of the filesystem be fully visible in the current mount namespace, before the filesystem may be mounted. Move the logic for calling fs_fully_visible from proc and sysfs into fs/namespace.c where it has greater access to mount namespace state. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 35ec87e490b1..2d24eeb8e59c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1897,6 +1897,7 @@ struct file_system_type { #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ +#define FS_USERNS_VISIBLE 32 /* FS must already be visible */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); @@ -1984,7 +1985,6 @@ extern int vfs_ustat(dev_t, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); extern bool our_mnt(struct vfsmount *mnt); -extern bool fs_fully_visible(struct file_system_type *); extern int current_umask(void); -- cgit v1.2.3 From 10081fb532a2a2216b7d8e4ad585c985075b6f60 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 1 May 2015 15:23:50 +0900 Subject: lib: introduce crc_t10dif_update() This introduces crc_t10dif_update() which enables to calculate CRC for a block which straddles multiple SG elements by calling multiple times. This also converts crc_t10dif() to use crc_t10dif_update() as they are almost same. (remove extra function call in crc_t10dif() and crc_t10dif_update - Tim + Herbert) Signed-off-by: Akinobu Mita Acked-by: Martin K. Petersen Cc: Tim Chen Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-crypto@vger.kernel.org Cc: Nicholas Bellinger Cc: Sagi Grimberg Cc: "Martin K. Petersen" Cc: Christoph Hellwig Cc: "James E.J. Bottomley" Cc: target-devel@vger.kernel.org Signed-off-by: Nicholas Bellinger --- include/linux/crc-t10dif.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index cf53d0773ce3..d81961e9e37d 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h @@ -9,5 +9,6 @@ extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len); extern __u16 crc_t10dif(unsigned char const *, size_t); +extern __u16 crc_t10dif_update(__u16 crc, unsigned char const *, size_t); #endif -- cgit v1.2.3 From cf561f0d2eb74574ad9985a2feab134267a9d298 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 24 Apr 2015 14:24:27 +0200 Subject: virtio: introduce virtio_is_little_endian() helper Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Acked-by: Cornelia Huck Reviewed-by: David Gibson --- include/linux/virtio_config.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 1e306f727edc..170947eb11b5 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -205,35 +205,40 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu) return 0; } +static inline bool virtio_is_little_endian(struct virtio_device *vdev) +{ + return virtio_has_feature(vdev, VIRTIO_F_VERSION_1); +} + /* Memory accessors */ static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val) { - return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __virtio16_to_cpu(virtio_is_little_endian(vdev), val); } static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val) { - return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio16(virtio_is_little_endian(vdev), val); } static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val) { - return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __virtio32_to_cpu(virtio_is_little_endian(vdev), val); } static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val) { - return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio32(virtio_is_little_endian(vdev), val); } static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val) { - return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __virtio64_to_cpu(virtio_is_little_endian(vdev), val); } static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) { - return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val); + return __cpu_to_virtio64(virtio_is_little_endian(vdev), val); } /* Config space accessors. */ -- cgit v1.2.3 From 5da7b160b36a42f161980c5e20d3960d6d076fb8 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 24 Apr 2015 14:24:58 +0200 Subject: vringh: introduce vringh_is_little_endian() helper Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Acked-by: Cornelia Huck Reviewed-by: David Gibson --- include/linux/vringh.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vringh.h b/include/linux/vringh.h index a3fa537e717a..3ed62efc2bc5 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -226,33 +226,38 @@ static inline void vringh_notify(struct vringh *vrh) vrh->notify(vrh); } +static inline bool vringh_is_little_endian(const struct vringh *vrh) +{ + return vrh->little_endian; +} + static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val) { - return __virtio16_to_cpu(vrh->little_endian, val); + return __virtio16_to_cpu(vringh_is_little_endian(vrh), val); } static inline __virtio16 cpu_to_vringh16(const struct vringh *vrh, u16 val) { - return __cpu_to_virtio16(vrh->little_endian, val); + return __cpu_to_virtio16(vringh_is_little_endian(vrh), val); } static inline u32 vringh32_to_cpu(const struct vringh *vrh, __virtio32 val) { - return __virtio32_to_cpu(vrh->little_endian, val); + return __virtio32_to_cpu(vringh_is_little_endian(vrh), val); } static inline __virtio32 cpu_to_vringh32(const struct vringh *vrh, u32 val) { - return __cpu_to_virtio32(vrh->little_endian, val); + return __cpu_to_virtio32(vringh_is_little_endian(vrh), val); } static inline u64 vringh64_to_cpu(const struct vringh *vrh, __virtio64 val) { - return __virtio64_to_cpu(vrh->little_endian, val); + return __virtio64_to_cpu(vringh_is_little_endian(vrh), val); } static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val) { - return __cpu_to_virtio64(vrh->little_endian, val); + return __cpu_to_virtio64(vringh_is_little_endian(vrh), val); } #endif /* _LINUX_VRINGH_H */ -- cgit v1.2.3 From 7d82410950aa74adccf035c332e409af2bb93e92 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 24 Apr 2015 14:26:24 +0200 Subject: virtio: add explicit big-endian support to memory accessors The current memory accessors logic is: - little endian if little_endian - native endian (i.e. no byteswap) if !little_endian If we want to fully support cross-endian vhost, we also need to be able to convert to big endian. Instead of changing the little_endian argument to some 3-value enum, this patch changes the logic to: - little endian if little_endian - big endian if !little_endian The native endian case is handled by all users with a trivial helper. This patch doesn't change any functionality, nor it does add overhead. Signed-off-by: Greg Kurz Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Reviewed-by: David Gibson --- include/linux/virtio_byteorder.h | 24 ++++++++++++++---------- include/linux/virtio_config.h | 3 ++- include/linux/vringh.h | 3 ++- 3 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_byteorder.h b/include/linux/virtio_byteorder.h index 51865d05b267..ce63a2c3a612 100644 --- a/include/linux/virtio_byteorder.h +++ b/include/linux/virtio_byteorder.h @@ -3,17 +3,21 @@ #include #include -/* - * Low-level memory accessors for handling virtio in modern little endian and in - * compatibility native endian format. - */ +static inline bool virtio_legacy_is_little_endian(void) +{ +#ifdef __LITTLE_ENDIAN + return true; +#else + return false; +#endif +} static inline u16 __virtio16_to_cpu(bool little_endian, __virtio16 val) { if (little_endian) return le16_to_cpu((__force __le16)val); else - return (__force u16)val; + return be16_to_cpu((__force __be16)val); } static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val) @@ -21,7 +25,7 @@ static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val) if (little_endian) return (__force __virtio16)cpu_to_le16(val); else - return (__force __virtio16)val; + return (__force __virtio16)cpu_to_be16(val); } static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val) @@ -29,7 +33,7 @@ static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val) if (little_endian) return le32_to_cpu((__force __le32)val); else - return (__force u32)val; + return be32_to_cpu((__force __be32)val); } static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val) @@ -37,7 +41,7 @@ static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val) if (little_endian) return (__force __virtio32)cpu_to_le32(val); else - return (__force __virtio32)val; + return (__force __virtio32)cpu_to_be32(val); } static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val) @@ -45,7 +49,7 @@ static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val) if (little_endian) return le64_to_cpu((__force __le64)val); else - return (__force u64)val; + return be64_to_cpu((__force __be64)val); } static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val) @@ -53,7 +57,7 @@ static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val) if (little_endian) return (__force __virtio64)cpu_to_le64(val); else - return (__force __virtio64)val; + return (__force __virtio64)cpu_to_be64(val); } #endif /* _LINUX_VIRTIO_BYTEORDER */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 170947eb11b5..e5ce8ab0b8b0 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -207,7 +207,8 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu) static inline bool virtio_is_little_endian(struct virtio_device *vdev) { - return virtio_has_feature(vdev, VIRTIO_F_VERSION_1); + return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) || + virtio_legacy_is_little_endian(); } /* Memory accessors */ diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 3ed62efc2bc5..bc6c28d04263 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -228,7 +228,8 @@ static inline void vringh_notify(struct vringh *vrh) static inline bool vringh_is_little_endian(const struct vringh *vrh) { - return vrh->little_endian; + return vrh->little_endian || + virtio_legacy_is_little_endian(); } static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val) -- cgit v1.2.3 From 632dda833e28fe43049a01b4bc53e409176e7843 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 May 2015 14:04:50 -0400 Subject: SUNRPC: Clean up bc_send() Clean up: Merge bc_send() into bc_svc_process(). Note: even thought this touches svc.c, it is a client-side change. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/bc_xprt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 2ca67b55e0fe..8df43c9f11dc 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -37,7 +37,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); -int bc_send(struct rpc_rqst *req); /* * Determine if a shared backchannel is in use -- cgit v1.2.3 From 0f41979164a52a1ca0f0a601f90000fc18e3a396 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Jun 2015 22:59:08 -0400 Subject: SUNRPC: Remove unused argument 'tk_ops' in rpc_run_bc_task Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 5f1e6bd4c316..2aa68976a482 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -205,8 +205,7 @@ struct rpc_wait_queue { */ struct rpc_task *rpc_new_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_task(const struct rpc_task_setup *); -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, - const struct rpc_call_ops *ops); +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); void rpc_exit_task(struct rpc_task *); -- cgit v1.2.3 From 0d2a970d0ae55086520e1e58e572a7acd519429c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Jun 2015 15:37:10 -0400 Subject: SUNRPC: Fix a backchannel race We need to allow the server to send a new request immediately after we've replied to the previous one. Right now, there is a window between the send and the release of the old request in rpc_put_task(), where the server could send us a new backchannel RPC call, and we have no request to service it. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 8b93ef53df3c..bc9c39d6d30d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -212,7 +212,8 @@ struct rpc_xprt { #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct svc_serv *bc_serv; /* The RPC service which will */ /* process the callback */ - unsigned int bc_alloc_count; /* Total number of preallocs */ + int bc_alloc_count; /* Total number of preallocs */ + atomic_t bc_free_slots; spinlock_t bc_pa_lock; /* Protects the preallocated * items */ struct list_head bc_pa_list; /* List of preallocated -- cgit v1.2.3 From b1999477ed91c3c33891acfe0e18a4457e5c4915 Mon Sep 17 00:00:00 2001 From: Guo Zeng Date: Tue, 14 Apr 2015 11:55:55 +0000 Subject: ARM: prima2: move to use REGMAP APIs for rtciobrg all devices behind rtciobrg needs a special way to access. currently they are using a platform-specific API. this patch moves to REGMAP, then clients can use regmap APIs to read/write. for the moment, old APIs are still kept, once all clients move to regmap, old APIs will be dropped. this patch also does minor clean for comments, authors statement. Signed-off-by: Guo Zeng Signed-off-by: Barry Song --- include/linux/rtc/sirfsoc_rtciobrg.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtc/sirfsoc_rtciobrg.h b/include/linux/rtc/sirfsoc_rtciobrg.h index 2c92e1c8e055..aefd997262e4 100644 --- a/include/linux/rtc/sirfsoc_rtciobrg.h +++ b/include/linux/rtc/sirfsoc_rtciobrg.h @@ -9,10 +9,14 @@ #ifndef _SIRFSOC_RTC_IOBRG_H_ #define _SIRFSOC_RTC_IOBRG_H_ +struct regmap_config; + extern void sirfsoc_rtc_iobrg_besyncing(void); extern u32 sirfsoc_rtc_iobrg_readl(u32 addr); extern void sirfsoc_rtc_iobrg_writel(u32 val, u32 addr); +struct regmap *devm_regmap_init_iobg(struct device *dev, + const struct regmap_config *config); #endif -- cgit v1.2.3 From 3c87ef6efb40f0e339d705c194b2224f854ec38e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:25 -0400 Subject: sunrpc: keep a count of swapfiles associated with the rpc_clnt Jerome reported seeing a warning pop when working with a swapfile on NFS. The nfs_swap_activate can end up calling sk_set_memalloc while holding the rcu_read_lock and that function can sleep. To fix that, we need to take a reference to the xprt while holding the rcu_read_lock, set the socket up for swapping and then drop that reference. But, xprt_put is not exported and having NFS deal with the underlying xprt is a bit of layering violation anyway. Fix this by adding a set of activate/deactivate functions that take a rpc_clnt pointer instead of an rpc_xprt, and have nfs_swap_activate and nfs_swap_deactivate call those. Also, add a per-rpc_clnt atomic counter to keep track of the number of active swapfiles associated with it. When the counter does a 0->1 transition, we enable swapping on the xprt, when we do a 1->0 transition we disable swapping on it. This also allows us to be a bit more selective with the RPC_TASK_SWAPPER flag. If non-swapper and swapper clnts are sharing a xprt, then we only need to flag the tasks from the swapper clnt with that flag. Acked-by: Mel Gorman Reported-by: Jerome Marchand Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/sched.h | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 598ba80ec30c..131032f15cc1 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -56,6 +56,7 @@ struct rpc_clnt { struct rpc_rtt * cl_rtt; /* RTO estimator data */ const struct rpc_timeout *cl_timeout; /* Timeout strategy */ + atomic_t cl_swapper; /* swapfile count */ int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME+1]; struct rpc_pipe_dir_head cl_pipedir_objects; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 2aa68976a482..d703f0ef37d8 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -268,4 +268,20 @@ static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, } #endif +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) +int rpc_clnt_swap_activate(struct rpc_clnt *clnt); +void rpc_clnt_swap_deactivate(struct rpc_clnt *clnt); +#else +static inline int +rpc_clnt_swap_activate(struct rpc_clnt *clnt) +{ + return -EINVAL; +} + +static inline void +rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) +{ +} +#endif /* CONFIG_SUNRPC_SWAP */ + #endif /* _LINUX_SUNRPC_SCHED_H_ */ -- cgit v1.2.3 From 8e2281330f9930bccf77cf04027ec60b6cc0fb34 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:26 -0400 Subject: sunrpc: make xprt->swapper an atomic_t Split xs_swapper into enable/disable functions and eliminate the "enable" flag. Currently, it's racy if you have multiple swapon/swapoff operations running in parallel over the same xprt. Also fix it so that we only set it to a memalloc socket on a 0->1 transition and only clear it on a 1->0 transition. Cc: Mel Gorman Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index bc9c39d6d30d..9a3308703c15 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -180,7 +180,7 @@ struct rpc_xprt { atomic_t num_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char resvport : 1; /* use a reserved port */ - unsigned int swapper; /* we're swapping over this + atomic_t swapper; /* we're swapping over this transport */ unsigned int bind_index; /* bind function index */ @@ -346,7 +346,8 @@ void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); -int xs_swapper(struct rpc_xprt *xprt, int enable); +int xs_swapper_enable(struct rpc_xprt *xprt); +void xs_swapper_disable(struct rpc_xprt *xprt); bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); void xprt_unlock_connect(struct rpc_xprt *, void *); -- cgit v1.2.3 From d67fa4d85a2143b46052b2e9ccc6749a4c97b2de Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:29 -0400 Subject: sunrpc: turn swapper_enable/disable functions into rpc_xprt_ops RDMA xprts don't have a sock_xprt, but an rdma_xprt, so the xs_swapper_enable/disable functions will likely oops when fed an RDMA xprt. Turn these functions into rpc_xprt_ops so that that doesn't occur. For now the RDMA versions are no-ops that just return -EINVAL on an attempt to swapon. Cc: Chuck Lever Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 9a3308703c15..b6096592b1d4 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -133,6 +133,8 @@ struct rpc_xprt_ops { void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); + int (*enable_swap)(struct rpc_xprt *xprt); + void (*disable_swap)(struct rpc_xprt *xprt); }; /* @@ -328,6 +330,18 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * return p + xprt->tsh_size; } +static inline int +xprt_enable_swap(struct rpc_xprt *xprt) +{ + return xprt->ops->enable_swap(xprt); +} + +static inline void +xprt_disable_swap(struct rpc_xprt *xprt) +{ + xprt->ops->disable_swap(xprt); +} + /* * Transport switch helper functions */ @@ -346,8 +360,6 @@ void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); -int xs_swapper_enable(struct rpc_xprt *xprt); -void xs_swapper_disable(struct rpc_xprt *xprt); bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); void xprt_unlock_connect(struct rpc_xprt *, void *); -- cgit v1.2.3 From 11598b8ff2b97cf034d0c025cf125c92b574bafc Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 10 Jun 2015 16:54:28 -0400 Subject: NFS: Remove unused nfs_rw_ops->rw_release() function This was only ever set to nfs_writeback_release_common(), a function which is completely empty. Let's just drop this function pointer and simplify the code a bit. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 3eb072dbce83..f2f650f136ee 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -67,7 +67,6 @@ struct nfs_rw_ops { const fmode_t rw_mode; struct nfs_pgio_header *(*rw_alloc_header)(void); void (*rw_free_header)(struct nfs_pgio_header *); - void (*rw_release)(struct nfs_pgio_header *); int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *, struct inode *); void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *); -- cgit v1.2.3 From 4a06825839889cc1756d0dd8a52d6b1071ee0263 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 May 2015 14:02:25 -0400 Subject: SUNRPC: Transport fault injection It has been exceptionally useful to exercise the logic that handles local immediate errors and RDMA connection loss. To enable developers to test this regularly and repeatably, add logic to simulate connection loss every so often. Fault injection is disabled by default. It is enabled with $ sudo echo xxx > /sys/kernel/debug/sunrpc/inject_fault/disconnect where "xxx" is a large positive number of transport method calls before a disconnect. A value of several thousand is usually a good number that allows reasonable forward progress while still causing a lot of connection drops. These hooks are disabled when SUNRPC_DEBUG is turned off. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index b6096592b1d4..0fb9acbb4780 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -135,6 +135,7 @@ struct rpc_xprt_ops { void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); int (*enable_swap)(struct rpc_xprt *xprt); void (*disable_swap)(struct rpc_xprt *xprt); + void (*inject_disconnect)(struct rpc_xprt *xprt); }; /* @@ -244,6 +245,7 @@ struct rpc_xprt { const char *address_strings[RPC_DISPLAY_MAX]; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct dentry *debugfs; /* debugfs directory */ + atomic_t inject_disconnect; #endif }; @@ -445,6 +447,23 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) return test_and_set_bit(XPRT_BINDING, &xprt->state); } +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +extern unsigned int rpc_inject_disconnect; +static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) +{ + if (!rpc_inject_disconnect) + return; + if (atomic_dec_return(&xprt->inject_disconnect)) + return; + atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect); + xprt->ops->inject_disconnect(xprt); +} +#else +static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) +{ +} +#endif + #endif /* __KERNEL__*/ #endif /* _LINUX_SUNRPC_XPRT_H */ -- cgit v1.2.3 From 7e53df111beea8db2543424d07bdee2a630698c3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:53:04 -0400 Subject: xprtrdma: Remove rpcrdma_ia::ri_memreg_strategy Clean up: This field is no longer used. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Reviewed-by: Devesh Sharma Tested-By: Devesh Sharma Reviewed-by: Doug Ledford Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprtrdma.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index c984c85981ea..b17613052cc3 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -56,7 +56,8 @@ #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ -/* memory registration strategies */ +/* Memory registration strategies, by number. + * This is part of a kernel / user space API. Do not remove. */ enum rpcrdma_memreg { RPCRDMA_BOUNCEBUFFERS = 0, RPCRDMA_REGISTER, -- cgit v1.2.3 From 764ad8ba8cd4c6f836fca9378f8c5121aece0842 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:43:56 -0400 Subject: nfs: increase size of EXCHANGE_ID name string buffer The current buffer is much too small if you have a relatively long hostname. Bring it up to the size of the one that SETCLIENTID has. Cc: Reported-by: Michael Skralivetsky Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 93ab6071bbe9..e9e9a8dcfb47 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1142,7 +1142,7 @@ struct nfs41_state_protection { struct nfs4_op_map allow; }; -#define NFS4_EXCHANGE_ID_LEN (48) +#define NFS4_EXCHANGE_ID_LEN (127) struct nfs41_exchange_id_args { struct nfs_client *client; nfs4_verifier *verifier; -- cgit v1.2.3 From 3a6bb738792500e8b4534c0350c13a132bac0492 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:43:57 -0400 Subject: nfs: convert setclientid and exchange_id encoders to use clp->cl_owner_id ...instead of buffers that are part of their arg structs. We already hold a reference to the client, so we might as well use the allocated buffer. In the event that we can't allocate the clp->cl_owner_id, then just return -ENOMEM. Note too that we switch from a GFP_KERNEL allocation here to GFP_NOFS. It's possible we could end up trying to do a SETCLIENTID or EXCHANGE_ID in order to reclaim some memory, and the GFP_KERNEL allocations in the existing code could cause recursion back into NFS reclaim. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e9e9a8dcfb47..6c0b423d781b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -994,7 +994,7 @@ struct nfs4_setclientid { char sc_netid[RPCBIND_MAXNETIDLEN + 1]; unsigned int sc_uaddr_len; char sc_uaddr[RPCBIND_MAXUADDRLEN + 1]; - u32 sc_cb_ident; + struct nfs_client *sc_clnt; struct rpc_cred *sc_cred; }; -- cgit v1.2.3 From 873e385116b2cc5c7daca8f51881371fadb90970 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:44:00 -0400 Subject: nfs: make nfs4_init_uniform_client_string use a dynamically allocated buffer Change the uniform client string generator to dynamically allocate the NFSv4 client name string buffer. With this patch, we can eliminate the buffers that are embedded within the "args" structs and simply use the name string that is hanging off the client. This uniform string case is a little simpler than the nonuniform since we don't need to deal with RCU, but we do have two different cases, depending on whether there is a uniquifier or not. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6c0b423d781b..c959e7eb5bd4 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -984,11 +984,8 @@ struct nfs4_readlink_res { struct nfs4_sequence_res seq_res; }; -#define NFS4_SETCLIENTID_NAMELEN (127) struct nfs4_setclientid { const nfs4_verifier * sc_verifier; - unsigned int sc_name_len; - char sc_name[NFS4_SETCLIENTID_NAMELEN + 1]; u32 sc_prog; unsigned int sc_netid_len; char sc_netid[RPCBIND_MAXNETIDLEN + 1]; @@ -1142,12 +1139,9 @@ struct nfs41_state_protection { struct nfs4_op_map allow; }; -#define NFS4_EXCHANGE_ID_LEN (127) struct nfs41_exchange_id_args { struct nfs_client *client; nfs4_verifier *verifier; - unsigned int id_len; - char id[NFS4_EXCHANGE_ID_LEN]; u32 flags; struct nfs41_state_protection state_protect; }; -- cgit v1.2.3 From fec47d863587c272d6fbf4e50066209c953d5e60 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 22 May 2015 15:45:27 -0500 Subject: remoteproc: introduce rproc_get_by_phandle API Allow users of remoteproc the ability to get a handle to an rproc by passing a phandle supplied in the user's device tree node. This is useful in situations that require manual booting of the rproc. This patch uses the code removed by commit 40e575b1d0b3 ("remoteproc: remove the get_by_name/put API") for the ref counting but is modified to use a simple list and locking mechanism and has rproc_get_by_name replaced with an rproc_get_by_phandle API. Signed-off-by: Dave Gerlach Signed-off-by: Suman Anna [fix order of Signed-off-by tags] Signed-off-by: Ohad Ben-Cohen --- include/linux/remoteproc.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 78b8a9b9d40a..56739e5df1e6 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -36,11 +36,11 @@ #define REMOTEPROC_H #include -#include #include #include #include #include +#include /** * struct resource_table - firmware resource table header @@ -375,7 +375,7 @@ enum rproc_crash_type { /** * struct rproc - represents a physical remote processor device - * @node: klist node of this rproc object + * @node: list node of this rproc object * @domain: iommu domain * @name: human readable name of the rproc * @firmware: name of firmware file to be loaded @@ -407,7 +407,7 @@ enum rproc_crash_type { * @has_iommu: flag to indicate if remote processor is behind an MMU */ struct rproc { - struct klist_node node; + struct list_head node; struct iommu_domain *domain; const char *name; const char *firmware; @@ -481,6 +481,7 @@ struct rproc_vdev { u32 rsc_offset; }; +struct rproc *rproc_get_by_phandle(phandle phandle); struct rproc *rproc_alloc(struct device *dev, const char *name, const struct rproc_ops *ops, const char *firmware, int len); -- cgit v1.2.3 From a01f7cd657c95941079d548ef7fbf0cc370c1259 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 22 May 2015 15:45:28 -0500 Subject: remoteproc: add a rproc ops for performing address translation The rproc_da_to_va API is currently used to perform any device to kernel address translations to meet the different needs of the remoteproc core/drivers (eg: loading). The functionality is achieved within the remoteproc core, and is limited only for carveouts allocated within the core. A new rproc ops, da_to_va, is added to provide flexibility to platform implementations to perform the address translation themselves when the above conditions cannot be met by the implementations. The rproc_da_to_va() API is extended to invoke this ops if present, and fallback to regular processing if the platform implementation cannot provide the translation. This will allow any remoteproc implementations to translate addresses for dedicated memories like internal memories. While at this, also update the rproc_da_to_va() documentation since it is an exported function. Signed-off-by: Suman Anna Signed-off-by: Dave Gerlach Signed-off-by: Ohad Ben-Cohen --- include/linux/remoteproc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 56739e5df1e6..9c4e1384f636 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -330,11 +330,13 @@ struct rproc; * @start: power on the device and boot it * @stop: power off the device * @kick: kick a virtqueue (virtqueue id given as a parameter) + * @da_to_va: optional platform hook to perform address translations */ struct rproc_ops { int (*start)(struct rproc *rproc); int (*stop)(struct rproc *rproc); void (*kick)(struct rproc *rproc, int vqid); + void * (*da_to_va)(struct rproc *rproc, u64 da, int len); }; /** -- cgit v1.2.3 From a01bc0d5f557bd8becd0ba75d09c39192004697e Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 22 May 2015 15:45:30 -0500 Subject: remoteproc/wkup_m3: add a remoteproc driver for TI Wakeup M3 Add a remoteproc driver to load the firmware and boot a small Wakeup M3 processor present on TI AM33xx and AM43xx SoCs. This Wakeup M3 remote processor is an integrated Cortex M3 that allows the SoC to enter the lowest possible power state by taking control from the MPU after it has gone into its own low power state and shutting off any additional peripherals. The Wakeup M3 processor has two internal memory regions - 16 kB of unified instruction memory called UMEM used to store executable code, and 8 kB of data memory called DMEM used for all data sections. The Wakeup M3 processor executes its code entirely from within the UMEM and uses the DMEM for any data. It does not use any external memory or any other external resources. The device address view has the UMEM at address 0x0 and DMEM at address 0x80000, and these are computed automatically within the driver based on relative address calculation from the corresponding device tree IOMEM resources. These device addresses are used to aid the core remoteproc ELF loader code to properly translate and load the firmware segments through the .rproc_da_to_va ops. Signed-off-by: Dave Gerlach Signed-off-by: Suman Anna Signed-off-by: Ohad Ben-Cohen --- include/linux/platform_data/wkup_m3.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 include/linux/platform_data/wkup_m3.h (limited to 'include/linux') diff --git a/include/linux/platform_data/wkup_m3.h b/include/linux/platform_data/wkup_m3.h new file mode 100644 index 000000000000..3f1d77effd71 --- /dev/null +++ b/include/linux/platform_data/wkup_m3.h @@ -0,0 +1,30 @@ +/* + * TI Wakeup M3 remote processor platform data + * + * Copyright (C) 2014-2015 Texas Instruments, Inc. + * + * Dave Gerlach + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_PLATFORM_DATA_WKUP_M3_H +#define _LINUX_PLATFORM_DATA_WKUP_M3_H + +struct platform_device; + +struct wkup_m3_platform_data { + const char *reset_name; + + int (*assert_reset)(struct platform_device *pdev, const char *name); + int (*deassert_reset)(struct platform_device *pdev, const char *name); +}; + +#endif /* _LINUX_PLATFORM_DATA_WKUP_M3_H */ -- cgit v1.2.3 From 4bacc9c9234c7c8eec44f5ed4e960d9f96fa0f01 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 18 Jun 2015 14:32:31 +0100 Subject: overlayfs: Make f_path always point to the overlay and f_inode to the underlay Make file->f_path always point to the overlay dentry so that the path in /proc/pid/fd is correct and to ensure that label-based LSMs have access to the overlay as well as the underlay (path-based LSMs probably don't need it). Using my union testsuite to set things up, before the patch I see: [root@andromeda union-testsuite]# bash 5 /a/foo107 [root@andromeda union-testsuite]# stat /mnt/a/foo107 ... Device: 23h/35d Inode: 13381 Links: 1 ... [root@andromeda union-testsuite]# stat -L /proc/$$/fd/5 ... Device: 23h/35d Inode: 13381 Links: 1 ... After the patch: [root@andromeda union-testsuite]# bash 5 /mnt/a/foo107 [root@andromeda union-testsuite]# stat /mnt/a/foo107 ... Device: 23h/35d Inode: 40346 Links: 1 ... [root@andromeda union-testsuite]# stat -L /proc/$$/fd/5 ... Device: 23h/35d Inode: 40346 Links: 1 ... Note the change in where /proc/$$/fd/5 points to in the ls command. It was pointing to /a/foo107 (which doesn't exist) and now points to /mnt/a/foo107 (which is correct). The inode accessed, however, is the lower layer. The union layer is on device 25h/37d and the upper layer on 24h/36d. Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/dcache.h | 2 ++ include/linux/fs.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index df334cbacc6d..167ec0934049 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -160,6 +160,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool); + struct inode *(*d_select_inode)(struct dentry *, unsigned); } ____cacheline_aligned; /* @@ -225,6 +226,7 @@ struct dentry_operations { #define DCACHE_MAY_FREE 0x00800000 #define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ +#define DCACHE_OP_SELECT_INODE 0x02000000 /* Unioned entry: dcache op selects inode */ extern seqlock_t rename_lock; diff --git a/include/linux/fs.h b/include/linux/fs.h index b577e801b4af..2bd77e10e8e5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1641,7 +1641,6 @@ struct inode_operations { int (*set_acl)(struct inode *, struct posix_acl *, int); /* WARNING: probably going away soon, do not use! */ - int (*dentry_open)(struct dentry *, struct file *, const struct cred *); } ____cacheline_aligned; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, @@ -2194,7 +2193,6 @@ extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int); -extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); -- cgit v1.2.3 From 9bf39ab2adafd7cf8740859cb49e7b7952813a5d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 19 Jun 2015 10:29:13 +0200 Subject: vfs: add file_path() helper Turn d_path(&file->f_path, ...); into file_path(file, ...); Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2bd77e10e8e5..2c135ad741a9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2500,6 +2500,8 @@ extern struct file * open_exec(const char *); extern int is_subdir(struct dentry *, struct dentry *); extern int path_is_under(struct path *, struct path *); +extern char *file_path(struct file *, char *, int); + #include /* needed for stackable file system support */ -- cgit v1.2.3 From 2726d56620ce71f40dd583d51391b86e1ab8cc57 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 19 Jun 2015 10:30:28 +0200 Subject: vfs: add seq_file_path() helper Turn seq_path(..., &file->f_path, ...); into seq_file_path(..., file, ...); Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro --- include/linux/seq_file.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index afbb1fd77c77..912a7c482649 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -123,6 +123,7 @@ __printf(2, 3) int seq_printf(struct seq_file *, const char *, ...); __printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args); int seq_path(struct seq_file *, const struct path *, const char *); +int seq_file_path(struct seq_file *, struct file *, const char *); int seq_dentry(struct seq_file *, struct dentry *, const char *); int seq_path_root(struct seq_file *m, const struct path *path, const struct path *root, const char *esc); -- cgit v1.2.3 From 5fa8e0a1c6a762857ae67d1628c58b9a02362003 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 May 2015 16:05:53 +0200 Subject: fs: Rename file_remove_suid() to file_remove_privs() file_remove_suid() is a misnomer since it removes also file capabilities stored in xattrs and sets S_NOSEC flag. Also should_remove_suid() tells something else than whether file_remove_suid() call is necessary which leads to bugs. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2c135ad741a9..641e68d850cf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2553,7 +2553,7 @@ extern struct inode *new_inode_pseudo(struct super_block *sb); extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); -extern int file_remove_suid(struct file *); +extern int file_remove_privs(struct file *); extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) -- cgit v1.2.3 From dbfae0cdcd87602737101d4417811f4323156b54 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 May 2015 16:05:54 +0200 Subject: fs: Provide function telling whether file_remove_privs() will do anything Provide function telling whether file_remove_privs() will do anything. Currently we only have should_remove_suid() and that does something slightly different. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 641e68d850cf..ee60e8ab210f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2554,6 +2554,7 @@ extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); extern int file_remove_privs(struct file *); +extern int file_needs_remove_privs(struct file *file); extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) -- cgit v1.2.3 From 45f147a1bc97c743c6101a8d2741c69a51f583e4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 May 2015 16:05:55 +0200 Subject: fs: Call security_ops->inode_killpriv on truncate Comment in include/linux/security.h says that ->inode_killpriv() should be called when setuid bit is being removed and that similar security labels (in fact this applies only to file capabilities) should be removed at this time as well. However we don't call ->inode_killpriv() when we remove suid bit on truncate. We fix the problem by calling ->inode_need_killpriv() and subsequently ->inode_killpriv() on truncate the same way as we do it on file write. After this patch there's only one user of should_remove_suid() - ocfs2 - and indeed it's buggy because it doesn't call ->inode_killpriv() on write. However fixing it is difficult because of special locking constraints. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- include/linux/fs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ee60e8ab210f..1e658b11c265 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2554,7 +2554,11 @@ extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int should_remove_suid(struct dentry *); extern int file_remove_privs(struct file *); -extern int file_needs_remove_privs(struct file *file); +extern int dentry_needs_remove_privs(struct dentry *dentry); +static inline int file_needs_remove_privs(struct file *file) +{ + return dentry_needs_remove_privs(file->f_path.dentry); +} extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) -- cgit v1.2.3 From b57c2cb9ea1a02c2ae08e16de8c20cc13ffbf85a Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sun, 24 May 2015 17:19:41 +0200 Subject: pagemap.h: move dir_pages() over there That function was declared in a lot of filesystems to calculate directory pages. Signed-off-by: Fabian Frederick Signed-off-by: Al Viro --- include/linux/pagemap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4b3736f7065c..808942d31062 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -670,4 +670,10 @@ static inline int add_to_page_cache(struct page *page, return error; } +static inline unsigned long dir_pages(struct inode *inode) +{ + return (unsigned long)(inode->i_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; +} + #endif /* _LINUX_PAGEMAP_H */ -- cgit v1.2.3 From dc3f4198eac14e52a98dfc79cd84b45e280f59cd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 18 May 2015 10:10:34 -0400 Subject: make simple_positive() public Signed-off-by: Al Viro --- include/linux/dcache.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 167ec0934049..d2d50249b7b2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -507,6 +507,11 @@ static inline bool d_really_is_positive(const struct dentry *dentry) return dentry->d_inode != NULL; } +static inline int simple_positive(struct dentry *dentry) +{ + return d_really_is_positive(dentry) && !d_unhashed(dentry); +} + extern void d_set_fallthru(struct dentry *dentry); static inline bool d_is_fallthru(const struct dentry *dentry) -- cgit v1.2.3 From be3a5d233922d73f27002ce2767f6ec03c3f473d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Jun 2015 19:51:55 +0800 Subject: NFSv.2/pnfs Add a LAYOUTSTATS rpc function Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 32201c269890..b8e72aad919c 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -500,6 +500,7 @@ enum { NFSPROC4_CLNT_SEEK, NFSPROC4_CLNT_ALLOCATE, NFSPROC4_CLNT_DEALLOCATE, + NFSPROC4_CLNT_LAYOUTSTATS, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c959e7eb5bd4..7bbe50504211 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -316,6 +316,49 @@ struct nfs4_layoutreturn { int rpc_status; }; +#define PNFS_LAYOUTSTATS_MAXSIZE 256 + +struct nfs42_layoutstat_args; +struct nfs42_layoutstat_devinfo; +typedef void (*layoutstats_encode_t)(struct xdr_stream *, + struct nfs42_layoutstat_args *, + struct nfs42_layoutstat_devinfo *); + +/* Per file per deviceid layoutstats */ +struct nfs42_layoutstat_devinfo { + struct nfs4_deviceid dev_id; + __u64 offset; + __u64 length; + __u64 read_count; + __u64 read_bytes; + __u64 write_count; + __u64 write_bytes; + __u32 layout_type; + layoutstats_encode_t layoutstats_encode; + void *layout_private; +}; + +struct nfs42_layoutstat_args { + struct nfs4_sequence_args seq_args; + struct nfs_fh *fh; + struct inode *inode; + nfs4_stateid stateid; + int num_dev; + struct nfs42_layoutstat_devinfo *devinfo; +}; + +struct nfs42_layoutstat_res { + struct nfs4_sequence_res seq_res; + int num_dev; + int rpc_status; +}; + +struct nfs42_layoutstat_data { + struct inode *inode; + struct nfs42_layoutstat_args args; + struct nfs42_layoutstat_res res; +}; + struct stateowner_id { __u64 create_time; __u32 uniquifier; -- cgit v1.2.3 From 1bfe3b259ff2e579b2acb190f874397d53274497 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:03 +0800 Subject: nfs42: serialize LAYOUTSTATS calls of the same file There is no need to report concurrently. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b95f914ce083..f91b5ade30c9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -219,6 +219,7 @@ struct nfs_inode { #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ +#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { -- cgit v1.2.3 From 144cba1493fdd6e3e1980e439a31df877831ebcd Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 27 Apr 2015 11:09:54 +0800 Subject: libceph: allow setting osd_req_op's flags Signed-off-by: Yan, Zheng Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 61b19c46bdb3..7506b485bb6d 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -249,7 +249,7 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); extern void osd_req_op_init(struct ceph_osd_request *osd_req, - unsigned int which, u16 opcode); + unsigned int which, u16 opcode, u32 flags); extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *, unsigned int which, -- cgit v1.2.3 From d50c97b566c5bbf990eff472e9feaa58fdebdd33 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 15 May 2015 11:52:20 +0300 Subject: libceph: nuke time_sub() Unused since ceph got merged into mainline I guess. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/libceph.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 30f92cefaa72..85ae9a889a3f 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -93,15 +93,6 @@ enum { CEPH_MOUNT_SHUTDOWN, }; -/* - * subtract jiffies - */ -static inline unsigned long time_sub(unsigned long a, unsigned long b) -{ - BUG_ON(time_after(b, a)); - return (long)a - (long)b; -} - struct ceph_mds_client; /* -- cgit v1.2.3 From a319bf56a617354e62cf5f774d2ca4e1a8a3bff3 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 15 May 2015 12:02:17 +0300 Subject: libceph: store timeouts in jiffies, verify user input There are currently three libceph-level timeouts that the user can specify on mount: mount_timeout, osd_idle_ttl and osdkeepalive. All of these are in seconds and no checking is done on user input: negative values are accepted, we multiply them all by HZ which may or may not overflow, arbitrarily large jiffies then get added together, etc. There is also a bug in the way mount_timeout=0 is handled. It's supposed to mean "infinite timeout", but that's not how wait.h APIs treat it and so __ceph_open_session() for example will busy loop without much chance of being interrupted if none of ceph-mons are there. Fix all this by verifying user input, storing timeouts capped by msecs_to_jiffies() in jiffies and using the new ceph_timeout_jiffies() helper for all user-specified waits to handle infinite timeouts correctly. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/libceph.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 85ae9a889a3f..d73a569f9bf5 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -43,9 +43,9 @@ struct ceph_options { int flags; struct ceph_fsid fsid; struct ceph_entity_addr my_addr; - int mount_timeout; - int osd_idle_ttl; - int osd_keepalive_timeout; + unsigned long mount_timeout; /* jiffies */ + unsigned long osd_idle_ttl; /* jiffies */ + unsigned long osd_keepalive_timeout; /* jiffies */ /* * any type that can't be simply compared or doesn't need need @@ -63,9 +63,9 @@ struct ceph_options { /* * defaults */ -#define CEPH_MOUNT_TIMEOUT_DEFAULT 60 -#define CEPH_OSD_KEEPALIVE_DEFAULT 5 -#define CEPH_OSD_IDLE_TTL_DEFAULT 60 +#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) +#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) +#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) @@ -93,6 +93,11 @@ enum { CEPH_MOUNT_SHUTDOWN, }; +static inline unsigned long ceph_timeout_jiffies(unsigned long timeout) +{ + return timeout ?: MAX_SCHEDULE_TIMEOUT; +} + struct ceph_mds_client; /* -- cgit v1.2.3 From f66fd9f0952187d274c13c136b74548f792c1925 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 10 Jun 2015 17:26:13 +0800 Subject: ceph: pre-allocate data structure that tracks caps flushing Signed-off-by: Yan, Zheng --- include/linux/ceph/libceph.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index d73a569f9bf5..9ebee53d3bf5 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -174,6 +174,7 @@ static inline int calc_pages_for(u64 off, u64 len) extern struct kmem_cache *ceph_inode_cachep; extern struct kmem_cache *ceph_cap_cachep; +extern struct kmem_cache *ceph_cap_flush_cachep; extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; -- cgit v1.2.3 From b459be739f97e2062b2ba77cfe8ea198dbd58904 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 12 Jun 2015 13:21:07 +0300 Subject: crush: sync up with userspace .. up to ceph.git commit 1db1abc8328d ("crush: eliminate ad hoc diff between kernel and userspace"). This fixes a bunch of recently pulled coding style issues and makes includes a bit cleaner. A patch "crush:Make the function crush_ln static" from Nicholas Krause is folded in as crush_ln() has been made static in userspace as well. Signed-off-by: Ilya Dryomov --- include/linux/crush/crush.h | 40 ++++++++++++++++++++++++++++++++++++++-- include/linux/crush/hash.h | 6 ++++++ include/linux/crush/mapper.h | 2 +- 3 files changed, 45 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 48a1a7d100f1..48b49305716b 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -1,7 +1,11 @@ #ifndef CEPH_CRUSH_CRUSH_H #define CEPH_CRUSH_CRUSH_H -#include +#ifdef __KERNEL__ +# include +#else +# include "crush_compat.h" +#endif /* * CRUSH is a pseudo-random data distribution algorithm that @@ -20,7 +24,11 @@ #define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */ #define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */ +#define CRUSH_MAX_RULESET (1<<8) /* max crush ruleset number */ +#define CRUSH_MAX_RULES CRUSH_MAX_RULESET /* should be the same as max rulesets */ +#define CRUSH_MAX_DEVICE_WEIGHT (100u * 0x10000u) +#define CRUSH_MAX_BUCKET_WEIGHT (65535u * 0x10000u) #define CRUSH_ITEM_UNDEF 0x7ffffffe /* undefined result (internal use only) */ #define CRUSH_ITEM_NONE 0x7fffffff /* no result */ @@ -108,6 +116,15 @@ enum { }; extern const char *crush_bucket_alg_name(int alg); +/* + * although tree was a legacy algorithm, it has been buggy, so + * exclude it. + */ +#define CRUSH_LEGACY_ALLOWED_BUCKET_ALGS ( \ + (1 << CRUSH_BUCKET_UNIFORM) | \ + (1 << CRUSH_BUCKET_LIST) | \ + (1 << CRUSH_BUCKET_STRAW)) + struct crush_bucket { __s32 id; /* this'll be negative */ __u16 type; /* non-zero; type=0 is reserved for devices */ @@ -174,7 +191,7 @@ struct crush_map { /* choose local attempts using a fallback permutation before * re-descent */ __u32 choose_local_fallback_tries; - /* choose attempts before giving up */ + /* choose attempts before giving up */ __u32 choose_total_tries; /* attempt chooseleaf inner descent once for firstn mode; on * reject retry outer descent. Note that this does *not* @@ -187,6 +204,25 @@ struct crush_map { * that want to limit reshuffling, a value of 3 or 4 will make the * mappings line up a bit better with previous mappings. */ __u8 chooseleaf_vary_r; + +#ifndef __KERNEL__ + /* + * version 0 (original) of straw_calc has various flaws. version 1 + * fixes a few of them. + */ + __u8 straw_calc_version; + + /* + * allowed bucket algs is a bitmask, here the bit positions + * are CRUSH_BUCKET_*. note that these are *bits* and + * CRUSH_BUCKET_* values are not, so we need to or together (1 + * << CRUSH_BUCKET_WHATEVER). The 0th bit is not used to + * minimize confusion (bucket type values start at 1). + */ + __u32 allowed_bucket_algs; + + __u32 *choose_tries; +#endif }; diff --git a/include/linux/crush/hash.h b/include/linux/crush/hash.h index 91e884230d5d..d1d90258242e 100644 --- a/include/linux/crush/hash.h +++ b/include/linux/crush/hash.h @@ -1,6 +1,12 @@ #ifndef CEPH_CRUSH_HASH_H #define CEPH_CRUSH_HASH_H +#ifdef __KERNEL__ +# include +#else +# include "crush_compat.h" +#endif + #define CRUSH_HASH_RJENKINS1 0 #define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1 diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index eab367446eea..5dfd5b1125d2 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -8,7 +8,7 @@ * LGPL2 */ -#include +#include "crush.h" extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size); extern int crush_do_rule(const struct crush_map *map, -- cgit v1.2.3 From 7c494375b773497228502133879072a9e959c063 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 1 Jun 2015 10:35:16 -0700 Subject: Input: improve parsing OF parameters for touchscreens When applying touchscreen parameters specified in device tree let's make sure we keep whatever setup was done by the driver and not reset the missing values to zero. Reported-by: Pavel Machek Tested-by: Pavel Machek Signed-off-by: Dmitry Torokhov --- include/linux/input/touchscreen.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/input/touchscreen.h b/include/linux/input/touchscreen.h index 08a5ef6e8f25..eecc9ea6cd58 100644 --- a/include/linux/input/touchscreen.h +++ b/include/linux/input/touchscreen.h @@ -12,9 +12,10 @@ #include #ifdef CONFIG_OF -void touchscreen_parse_of_params(struct input_dev *dev); +void touchscreen_parse_of_params(struct input_dev *dev, bool multitouch); #else -static inline void touchscreen_parse_of_params(struct input_dev *dev) +static inline void touchscreen_parse_of_params(struct input_dev *dev, + bool multitouch) { } #endif -- cgit v1.2.3 From 6c5a0d891543873aefc3aaf846c1e7afe0982ff9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 27 Jun 2015 11:45:46 -0400 Subject: NFSv4.2: LAYOUTSTATS is optional to implement Make it so, by checking the return value for NFS4ERR_MOTSUPP and caching the information as a server capability. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 5e1273d4de14..a2ea1491d3df 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -237,5 +237,6 @@ struct nfs_server { #define NFS_CAP_SEEK (1U << 19) #define NFS_CAP_ALLOCATE (1U << 20) #define NFS_CAP_DEALLOCATE (1U << 21) +#define NFS_CAP_LAYOUTSTATS (1U << 22) #endif -- cgit v1.2.3 From 31f02455455d405320e2f749696bef4e02903b35 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 30 Jun 2015 12:07:17 -0400 Subject: sparse: fix misplaced __pmem definition Move the definition of __pmem outside of CONFIG_SPARSE_RCU_POINTER to fix: drivers/nvdimm/pmem.c:198:17: sparse: too many arguments for function __builtin_expect drivers/nvdimm/pmem.c:36:33: sparse: expected ; at end of declaration drivers/nvdimm/pmem.c:48:21: sparse: void declaration ...due to __pmem failing to be defined in some configurations when CONFIG_SPARSE_RCU_POINTER=y. Reported-by: kbuild test robot Reported-by: Dan Carpenter Signed-off-by: Dan Williams --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 26fc8bc77f85..d8fbd500330e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -17,11 +17,11 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) +# define __pmem __attribute__((noderef, address_space(5))) #ifdef CONFIG_SPARSE_RCU_POINTER # define __rcu __attribute__((noderef, address_space(4))) #else # define __rcu -# define __pmem __attribute__((noderef, address_space(5))) #endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); -- cgit v1.2.3 From 8a81252b774b53e628a8a0fe18e2b8fc236d92cc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Jun 2015 15:54:08 +0200 Subject: fs/file.c: don't acquire files->file_lock in fd_install() Mateusz Guzik reported : Currently obtaining a new file descriptor results in locking fdtable twice - once in order to reserve a slot and second time to fill it. Holding the spinlock in __fd_install() is needed in case a resize is done, or to prevent a resize. Mateusz provided an RFC patch and a micro benchmark : http://people.redhat.com/~mguzik/pipebench.c A resize is an unlikely operation in a process lifetime, as table size is at least doubled at every resize. We can use RCU instead of the spinlock. __fd_install() must wait if a resize is in progress. The resize must block new __fd_install() callers from starting, and wait that ongoing install are finished (synchronize_sched()) resize should be attempted by a single thread to not waste resources. rcu_sched variant is used, as __fd_install() and expand_fdtable() run from process context. It gives us a ~30% speedup using pipebench on a dual Intel(R) Xeon(R) CPU E5-2696 v2 @ 2.50GHz Signed-off-by: Eric Dumazet Reported-by: Mateusz Guzik Acked-by: Mateusz Guzik Tested-by: Mateusz Guzik Signed-off-by: Al Viro --- include/linux/fdtable.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 230f87bdf5ad..fbb88740634a 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -47,6 +47,9 @@ struct files_struct { * read mostly part */ atomic_t count; + bool resize_in_progress; + wait_queue_head_t resize_wait; + struct fdtable __rcu *fdt; struct fdtable fdtab; /* -- cgit v1.2.3 From fbabfd0f4ee2e8847bf56edf481249ad1bb8c44d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 9 May 2015 15:54:49 -0500 Subject: fs: Add helper functions for permanently empty directories. To ensure it is safe to mount proc and sysfs I need to check if filesystems that are mounted on top of them are mounted on truly empty directories. Given that some directories can gain entries over time, knowing that a directory is empty right now is insufficient. Therefore add supporting infrastructure for permantently empty directories that proc and sysfs can use when they create mount points for filesystems and fs_fully_visible can use to test for permanently empty directories to ensure that nothing will be gained by mounting a fresh copy of proc or sysfs. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2d24eeb8e59c..571aab91bfc0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2780,6 +2780,8 @@ extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned in extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern const struct file_operations simple_dir_operations; extern const struct inode_operations simple_dir_inode_operations; +extern void make_empty_dir_inode(struct inode *inode); +extern bool is_empty_dir_inode(struct inode *inode); struct tree_descr { char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); -- cgit v1.2.3 From f9bd6733d3f11e24f3949becf277507d422ee1eb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 9 May 2015 22:09:14 -0500 Subject: sysctl: Allow creating permanently empty directories that serve as mountpoints. Add a magic sysctl table sysctl_mount_point that when used to create a directory forces that directory to be permanently empty. Update the code to use make_empty_dir_inode when accessing permanently empty directories. Update the code to not allow adding to permanently empty directories. Update /proc/sys/fs/binfmt_misc to be a permanently empty directory. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- include/linux/sysctl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 795d5fea5697..fa7bc29925c9 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -188,6 +188,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); + +extern struct ctl_table sysctl_mount_point[]; + #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { -- cgit v1.2.3 From ea015218f2f7ace2dad9cedd21ed95bdba2886d7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 16:09:29 -0500 Subject: kernfs: Add support for always empty directories. Add a new function kernfs_create_empty_dir that can be used to create directory that can not be modified. Update the code to use make_empty_dir_inode when reporting a permanently empty directory to the vfs. Update the code to not allow adding to permanently empty directories. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- include/linux/kernfs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 71ecdab1671b..29d1896c3ba5 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -45,6 +45,7 @@ enum kernfs_node_flag { KERNFS_LOCKDEP = 0x0100, KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, + KERNFS_EMPTY_DIR = 0x1000, }; /* @flags for kernfs_create_root() */ @@ -285,6 +286,8 @@ void kernfs_destroy_root(struct kernfs_root *root); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, void *priv, const void *ns); +struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, + const char *name); struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, loff_t size, -- cgit v1.2.3 From 87d2846fcf88113fae2341da1ca9a71f0d916f2c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 May 2015 16:31:40 -0500 Subject: sysfs: Add support for permanently empty directories to serve as mount points. Add two functions sysfs_create_mount_point and sysfs_remove_mount_point that hang a permanently empty directory off of a kobject or remove a permanently emptpy directory hanging from a kobject. Export these new functions so modular filesystems can use them. Cc: stable@vger.kernel.org Acked-by: Greg Kroah-Hartman Signed-off-by: "Eric W. Biederman" --- include/linux/sysfs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 99382c0df17e..9f65758311a4 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -210,6 +210,10 @@ int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, int __must_check sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, const void *new_ns); +int __must_check sysfs_create_mount_point(struct kobject *parent_kobj, + const char *name); +void sysfs_remove_mount_point(struct kobject *parent_kobj, + const char *name); int __must_check sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, @@ -298,6 +302,17 @@ static inline int sysfs_move_dir_ns(struct kobject *kobj, return 0; } +static inline int sysfs_create_mount_point(struct kobject *parent_kobj, + const char *name) +{ + return 0; +} + +static inline void sysfs_remove_mount_point(struct kobject *parent_kobj, + const char *name) +{ +} + static inline int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) -- cgit v1.2.3 From bd7ade3cd9b0850264306f5c2b79024a417b6396 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 2 Jul 2015 01:32:44 -0400 Subject: bufferhead: Add _gfp version for sb_getblk() sb_getblk() is used during ext4 (and possibly other FSes) writeback paths. Sometimes such path require allocating memory and guaranteeing that such allocation won't block. Currently, however, there is no way to provide user flags for sb_getblk which could lead to deadlocks. This patch implements a sb_getblk_gfp with the only difference it can accept user-provided GFP flags. Signed-off-by: Nikolay Borisov Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- include/linux/buffer_head.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 73b45225a7ca..e6797ded700e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -317,6 +317,13 @@ sb_getblk(struct super_block *sb, sector_t block) return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); } + +static inline struct buffer_head * +sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) +{ + return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp); +} + static inline struct buffer_head * sb_find_get_block(struct super_block *sb, sector_t block) { -- cgit v1.2.3 From ec110bc7cc48d7806c9b65094e6afb19452d458f Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Thu, 7 May 2015 06:45:21 -0400 Subject: NTB: Move files in preparation for NTB abstraction This patch only moves files to their new locations, before applying the next two patches adding the NTB Abstraction layer. Splitting this patch from the next is intended make distinct which code is changed only due to moving the files, versus which are substantial code changes in adding the NTB Abstraction layer. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 88 ------------------------------------------- include/linux/ntb_transport.h | 88 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 88 deletions(-) delete mode 100644 include/linux/ntb.h create mode 100644 include/linux/ntb_transport.h (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h deleted file mode 100644 index 9ac1a62fc6f5..000000000000 --- a/include/linux/ntb.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2012 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * BSD LICENSE - * - * Copyright(c) 2012 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copy - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Intel PCIe NTB Linux driver - * - * Contact Information: - * Jon Mason - */ - -struct ntb_transport_qp; - -struct ntb_client { - struct device_driver driver; - int (*probe)(struct pci_dev *pdev); - void (*remove)(struct pci_dev *pdev); -}; - -enum { - NTB_LINK_DOWN = 0, - NTB_LINK_UP, -}; - -int ntb_register_client(struct ntb_client *drvr); -void ntb_unregister_client(struct ntb_client *drvr); -int ntb_register_client_dev(char *device_name); -void ntb_unregister_client_dev(char *device_name); - -struct ntb_queue_handlers { - void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); - void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); - void (*event_handler)(void *data, int status); -}; - -unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp); -unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp); -struct ntb_transport_qp * -ntb_transport_create_queue(void *data, struct pci_dev *pdev, - const struct ntb_queue_handlers *handlers); -void ntb_transport_free_queue(struct ntb_transport_qp *qp); -int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, - unsigned int len); -int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, - unsigned int len); -void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len); -void ntb_transport_link_up(struct ntb_transport_qp *qp); -void ntb_transport_link_down(struct ntb_transport_qp *qp); -bool ntb_transport_link_query(struct ntb_transport_qp *qp); diff --git a/include/linux/ntb_transport.h b/include/linux/ntb_transport.h new file mode 100644 index 000000000000..f78b64cc09d5 --- /dev/null +++ b/include/linux/ntb_transport.h @@ -0,0 +1,88 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2012 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * BSD LICENSE + * + * Copyright(c) 2012 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Intel PCIe NTB Linux driver + * + * Contact Information: + * Jon Mason + */ + +struct ntb_transport_qp; + +struct ntb_client { + struct device_driver driver; + int (*probe)(struct pci_dev *pdev); + void (*remove)(struct pci_dev *pdev); +}; + +enum { + NTB_LINK_DOWN = 0, + NTB_LINK_UP, +}; + +int ntb_transport_register_client(struct ntb_client *drvr); +void ntb_transport_unregister_client(struct ntb_client *drvr); +int ntb_register_client_dev(char *device_name); +void ntb_unregister_client_dev(char *device_name); + +struct ntb_queue_handlers { + void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*event_handler)(void *data, int status); +}; + +unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp); +unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp); +struct ntb_transport_qp * +ntb_transport_create_queue(void *data, struct pci_dev *pdev, + const struct ntb_queue_handlers *handlers); +void ntb_transport_free_queue(struct ntb_transport_qp *qp); +int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, + unsigned int len); +int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, + unsigned int len); +void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len); +void ntb_transport_link_up(struct ntb_transport_qp *qp); +void ntb_transport_link_down(struct ntb_transport_qp *qp); +bool ntb_transport_link_query(struct ntb_transport_qp *qp); -- cgit v1.2.3 From a13f35e8714009145e32ebe2bf25b84e1376e314 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 2 Jul 2015 08:44:34 -0600 Subject: writeback: don't embed root bdi_writeback_congested in bdi_writeback 52ebea749aae ("writeback: make backing_dev_info host cgroup-specific bdi_writebacks") made bdi (backing_dev_info) host per-cgroup wb's (bdi_writeback's). As the congested state needs to be per-wb and referenced from blkcg side and multiple wbs, the patch made all non-root cong's (bdi_writeback_congested's) reference counted and indexed on bdi. When a bdi is destroyed, cgwb_bdi_destroy() tries to drain all non-root cong's; however, this can hang indefinitely because wb's can also be referenced from blkcg_gq's which are destroyed after bdi destruction is complete. To fix the bug, bdi destruction will be updated to not wait for cong's to drain, which naturally means that cong's may outlive the associated bdi. This is fine for non-root cong's but is problematic for the root cong's which are embedded in their bdi's as they may end up getting dereferenced after the containing bdi's are freed. This patch makes root cong's behave the same as non-root cong's. They are no longer embedded in their bdi's but allocated separately during bdi initialization, indexed and reference counted the same way. * As cong handling is the same for all wb's, wb->congested initialization is moved into wb_init(). * When !CONFIG_CGROUP_WRITEBACK, there was no indexing or refcnting. bdi->wb_congested is now a pointer pointing to the root cong allocated during bdi init and minimal refcnting operations are implemented. * The above makes root wb init paths diverge depending on CONFIG_CGROUP_WRITEBACK. root wb init is moved to cgwb_bdi_init(). This patch in itself shouldn't cause any consequential behavior differences but prepares for the actual fix. Signed-off-by: Tejun Heo Reported-by: Jon Christopherson Link: https://bugzilla.kernel.org/show_bug.cgi?id=100681 Tested-by: Jon Christopherson Added include to backing-dev.h for kfree() definition. Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 5 +++-- include/linux/backing-dev.h | 6 +++++- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index a48d90e3bcbb..a23209b43842 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -50,10 +50,10 @@ enum wb_stat_item { */ struct bdi_writeback_congested { unsigned long state; /* WB_[a]sync_congested flags */ + atomic_t refcnt; /* nr of attached wb's and blkg */ #ifdef CONFIG_CGROUP_WRITEBACK struct backing_dev_info *bdi; /* the associated bdi */ - atomic_t refcnt; /* nr of attached wb's and blkg */ int blkcg_id; /* ID of the associated blkcg */ struct rb_node rb_node; /* on bdi->cgwb_congestion_tree */ #endif @@ -150,11 +150,12 @@ struct backing_dev_info { atomic_long_t tot_write_bandwidth; struct bdi_writeback wb; /* the root writeback info for this bdi */ - struct bdi_writeback_congested wb_congested; /* its congested state */ #ifdef CONFIG_CGROUP_WRITEBACK struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ struct rb_root cgwb_congested_tree; /* their congested states */ atomic_t usage_cnt; /* counts both cgwbs and cgwb_contested's */ +#else + struct bdi_writeback_congested *wb_congested; #endif wait_queue_head_t wb_waitq; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0e6d4828a77a..0fe9df983ab7 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -15,6 +15,7 @@ #include #include #include +#include int __must_check bdi_init(struct backing_dev_info *bdi); void bdi_destroy(struct backing_dev_info *bdi); @@ -465,11 +466,14 @@ static inline bool inode_cgwb_enabled(struct inode *inode) static inline struct bdi_writeback_congested * wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp) { - return bdi->wb.congested; + atomic_inc(&bdi->wb_congested->refcnt); + return bdi->wb_congested; } static inline void wb_congested_put(struct bdi_writeback_congested *congested) { + if (atomic_dec_and_test(&congested->refcnt)) + kfree(congested); } static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) -- cgit v1.2.3 From 91e20b5040c67c51aad88cf87db4305c5bd7f79d Mon Sep 17 00:00:00 2001 From: Joel Porquet Date: Thu, 2 Jul 2015 15:32:00 -0400 Subject: irqchip: Move IRQCHIP_DECLARE macro to include/linux/irqchip.h At the moment the IRQCHIP_DECLARE macro is only declared locally in drivers/irqchip/irqchip.h. It prevents from using it directly in arch/* directories whenever irqchip drivers only exist there, which happens in a few cases (e.g. arc, arm, microblaze and mips). This patch makes the macro to be globally defined, i.e. in include/linux/irqchip.h, and thus usable for arch-specific declarations of irqchip drivers. In this way, it is very similar to what clocksource does (ie CLOCKSOURCE_OF_DECLARE is defined in include/linux/clocksource.h). For now, this patch only moves the declaration of the macro IRQCHIP_DECLARE to the global header 'include/linux/irqchip.h' and make 'drivers/irqchip/irqchip.h' include 'include/linux/irqchip.h'. Later, other patches will get rid of 'drivers/irqchip/irqchip.h' and modify all the impacted irqchip drivers. Signed-off-by: Joel Porquet Cc: Jason Cooper Link: http://lkml.kernel.org/r/1435865565-14114-1-git-send-email-joel@porquet.org Signed-off-by: Thomas Gleixner --- include/linux/irqchip.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h index 14d79131f53d..638887376e58 100644 --- a/include/linux/irqchip.h +++ b/include/linux/irqchip.h @@ -11,6 +11,20 @@ #ifndef _LINUX_IRQCHIP_H #define _LINUX_IRQCHIP_H +#include + +/* + * This macro must be used by the different irqchip drivers to declare + * the association between their DT compatible string and their + * initialization function. + * + * @name: name that must be unique accross all IRQCHIP_DECLARE of the + * same file. + * @compstr: compatible string of the irqchip driver + * @fn: initialization function + */ +#define IRQCHIP_DECLARE(name, compat, fn) OF_DECLARE_2(irqchip, name, compat, fn) + #ifdef CONFIG_IRQCHIP void irqchip_init(void); #else -- cgit v1.2.3 From 2ecd9d29abb171d6e97a4f3eb29d7456a11401b7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 3 Jul 2015 18:53:58 +0200 Subject: sched, preempt_notifier: separate notifier registration from static_key inc/dec Commit 1cde2930e154 ("sched/preempt: Add static_key() to preempt_notifiers") had two problems. First, the preempt-notifier API needs to sleep with the addition of the static_key, we do however need to hold off preemption while modifying the preempt notifier list, otherwise a preemption could observe an inconsistent list state. KVM correctly registers and unregisters preempt notifiers with preemption disabled, so the sleep caused dmesg splats. Second, KVM registers and unregisters preemption notifiers very often (in vcpu_load/vcpu_put). With a single uniprocessor guest the static key would move between 0 and 1 continuously, hitting the slow path on every userspace exit. To fix this, wrap the static_key inc/dec in a new API, and call it from KVM. Fixes: 1cde2930e154 ("sched/preempt: Add static_key() to preempt_notifiers") Reported-by: Pontus Fuchs Reported-by: Takashi Iwai Tested-by: Takashi Iwai Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paolo Bonzini --- include/linux/preempt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 0f1534acaf60..84991f185173 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -293,6 +293,8 @@ struct preempt_notifier { struct preempt_ops *ops; }; +void preempt_notifier_inc(void); +void preempt_notifier_dec(void); void preempt_notifier_register(struct preempt_notifier *notifier); void preempt_notifier_unregister(struct preempt_notifier *notifier); -- cgit v1.2.3 From f6db8347993256b58bd4746b0c4c5b935c32210d Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 25 Jun 2015 23:53:37 +0530 Subject: sched/stat: Simplify the sched_info accounting dependency Both CONFIG_SCHEDSTATS=y and CONFIG_TASK_DELAY_ACCT=y track task sched_info, which results in ugly #if clauses. Simplify the code by introducing a synthethic CONFIG_SCHED_INFO switch, selected by both. Signed-off-by: Naveen N. Rao Cc: Balbir Singh Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: a.p.zijlstra@chello.nl Cc: ricklind@us.ibm.com Link: http://lkml.kernel.org/r/8d19eef800811a94b0f91bcbeb27430a884d7433.1435255405.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6633e83e608a..9bf4bc0e3b8b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -849,7 +849,7 @@ extern struct user_struct root_user; struct backing_dev_info; struct reclaim_state; -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) +#ifdef CONFIG_SCHED_INFO struct sched_info { /* cumulative counters */ unsigned long pcount; /* # of times run on this cpu */ @@ -859,7 +859,7 @@ struct sched_info { unsigned long long last_arrival,/* when we last ran on a cpu */ last_queued; /* when we were last queued to run */ }; -#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ +#endif /* CONFIG_SCHED_INFO */ #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info { @@ -1408,7 +1408,7 @@ struct task_struct { int rcu_tasks_idle_cpu; #endif /* #ifdef CONFIG_TASKS_RCU */ -#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) +#ifdef CONFIG_SCHED_INFO struct sched_info sched_info; #endif -- cgit v1.2.3 From 6b55c9654fccf69ae7ace23ca101dc37b903181b Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 25 Jun 2015 22:51:41 +0530 Subject: sched/debug: Move print_cfs_rq() declaration to kernel/sched/sched.h Currently print_cfs_rq() is declared in include/linux/sched.h. However it's not used outside kernel/sched. Hence move the declaration to kernel/sched/sched.h Also some functions are only available for CONFIG_SCHED_DEBUG=y. Hence move the declarations to within the #ifdef. Signed-off-by: Srikar Dronamraju Acked-by: Rik van Riel Cc: Iulia Manda Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435252903-1081-2-git-send-email-srikar@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9bf4bc0e3b8b..3505352dd296 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -191,8 +191,6 @@ struct task_group; #ifdef CONFIG_SCHED_DEBUG extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); extern void proc_sched_set_task(struct task_struct *p); -extern void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); #endif /* -- cgit v1.2.3 From a1bd3baeb2f18b2b3d0f98ce5fdaa725149b950b Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Thu, 9 Apr 2015 10:33:20 -0400 Subject: NTB: Add NTB hardware abstraction layer Abstract the NTB device behind a programming interface, so that it can support different hardware and client drivers. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 984 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 984 insertions(+) create mode 100644 include/linux/ntb.h (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h new file mode 100644 index 000000000000..b02f72bb8e32 --- /dev/null +++ b/include/linux/ntb.h @@ -0,0 +1,984 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * PCIe NTB Linux driver + * + * Contact Information: + * Allen Hubbe + */ + +#ifndef _NTB_H_ +#define _NTB_H_ + +#include +#include + +struct ntb_client; +struct ntb_dev; +struct pci_dev; + +/** + * enum ntb_topo - NTB connection topology + * @NTB_TOPO_NONE: Topology is unknown or invalid. + * @NTB_TOPO_PRI: On primary side of local ntb. + * @NTB_TOPO_SEC: On secondary side of remote ntb. + * @NTB_TOPO_B2B_USD: On primary side of local ntb upstream of remote ntb. + * @NTB_TOPO_B2B_DSD: On primary side of local ntb downstream of remote ntb. + */ +enum ntb_topo { + NTB_TOPO_NONE = -1, + NTB_TOPO_PRI, + NTB_TOPO_SEC, + NTB_TOPO_B2B_USD, + NTB_TOPO_B2B_DSD, +}; + +static inline int ntb_topo_is_b2b(enum ntb_topo topo) +{ + switch ((int)topo) { + case NTB_TOPO_B2B_USD: + case NTB_TOPO_B2B_DSD: + return 1; + } + return 0; +} + +static inline char *ntb_topo_string(enum ntb_topo topo) +{ + switch (topo) { + case NTB_TOPO_NONE: return "NTB_TOPO_NONE"; + case NTB_TOPO_PRI: return "NTB_TOPO_PRI"; + case NTB_TOPO_SEC: return "NTB_TOPO_SEC"; + case NTB_TOPO_B2B_USD: return "NTB_TOPO_B2B_USD"; + case NTB_TOPO_B2B_DSD: return "NTB_TOPO_B2B_DSD"; + } + return "NTB_TOPO_INVALID"; +} + +/** + * enum ntb_speed - NTB link training speed + * @NTB_SPEED_AUTO: Request the max supported speed. + * @NTB_SPEED_NONE: Link is not trained to any speed. + * @NTB_SPEED_GEN1: Link is trained to gen1 speed. + * @NTB_SPEED_GEN2: Link is trained to gen2 speed. + * @NTB_SPEED_GEN3: Link is trained to gen3 speed. + */ +enum ntb_speed { + NTB_SPEED_AUTO = -1, + NTB_SPEED_NONE = 0, + NTB_SPEED_GEN1 = 1, + NTB_SPEED_GEN2 = 2, + NTB_SPEED_GEN3 = 3, +}; + +/** + * enum ntb_width - NTB link training width + * @NTB_WIDTH_AUTO: Request the max supported width. + * @NTB_WIDTH_NONE: Link is not trained to any width. + * @NTB_WIDTH_1: Link is trained to 1 lane width. + * @NTB_WIDTH_2: Link is trained to 2 lane width. + * @NTB_WIDTH_4: Link is trained to 4 lane width. + * @NTB_WIDTH_8: Link is trained to 8 lane width. + * @NTB_WIDTH_12: Link is trained to 12 lane width. + * @NTB_WIDTH_16: Link is trained to 16 lane width. + * @NTB_WIDTH_32: Link is trained to 32 lane width. + */ +enum ntb_width { + NTB_WIDTH_AUTO = -1, + NTB_WIDTH_NONE = 0, + NTB_WIDTH_1 = 1, + NTB_WIDTH_2 = 2, + NTB_WIDTH_4 = 4, + NTB_WIDTH_8 = 8, + NTB_WIDTH_12 = 12, + NTB_WIDTH_16 = 16, + NTB_WIDTH_32 = 32, +}; + +/** + * struct ntb_client_ops - ntb client operations + * @probe: Notify client of a new device. + * @remove: Notify client to remove a device. + */ +struct ntb_client_ops { + int (*probe)(struct ntb_client *client, struct ntb_dev *ntb); + void (*remove)(struct ntb_client *client, struct ntb_dev *ntb); +}; + +static inline int ntb_client_ops_is_valid(const struct ntb_client_ops *ops) +{ + /* commented callbacks are not required: */ + return + ops->probe && + ops->remove && + 1; +} + +/** + * struct ntb_ctx_ops - ntb driver context operations + * @link_event: See ntb_link_event(). + * @db_event: See ntb_db_event(). + */ +struct ntb_ctx_ops { + void (*link_event)(void *ctx); + void (*db_event)(void *ctx, int db_vector); +}; + +static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) +{ + /* commented callbacks are not required: */ + return + /* ops->link_event && */ + /* ops->db_event && */ + 1; +} + +/** + * struct ntb_ctx_ops - ntb device operations + * @mw_count: See ntb_mw_count(). + * @mw_get_range: See ntb_mw_get_range(). + * @mw_set_trans: See ntb_mw_set_trans(). + * @mw_clear_trans: See ntb_mw_clear_trans(). + * @link_is_up: See ntb_link_is_up(). + * @link_enable: See ntb_link_enable(). + * @link_disable: See ntb_link_disable(). + * @db_is_unsafe: See ntb_db_is_unsafe(). + * @db_valid_mask: See ntb_db_valid_mask(). + * @db_vector_count: See ntb_db_vector_count(). + * @db_vector_mask: See ntb_db_vector_mask(). + * @db_read: See ntb_db_read(). + * @db_set: See ntb_db_set(). + * @db_clear: See ntb_db_clear(). + * @db_read_mask: See ntb_db_read_mask(). + * @db_set_mask: See ntb_db_set_mask(). + * @db_clear_mask: See ntb_db_clear_mask(). + * @peer_db_addr: See ntb_peer_db_addr(). + * @peer_db_read: See ntb_peer_db_read(). + * @peer_db_set: See ntb_peer_db_set(). + * @peer_db_clear: See ntb_peer_db_clear(). + * @peer_db_read_mask: See ntb_peer_db_read_mask(). + * @peer_db_set_mask: See ntb_peer_db_set_mask(). + * @peer_db_clear_mask: See ntb_peer_db_clear_mask(). + * @spad_is_unsafe: See ntb_spad_is_unsafe(). + * @spad_count: See ntb_spad_count(). + * @spad_read: See ntb_spad_read(). + * @spad_write: See ntb_spad_write(). + * @peer_spad_addr: See ntb_peer_spad_addr(). + * @peer_spad_read: See ntb_peer_spad_read(). + * @peer_spad_write: See ntb_peer_spad_write(). + */ +struct ntb_dev_ops { + int (*mw_count)(struct ntb_dev *ntb); + int (*mw_get_range)(struct ntb_dev *ntb, int idx, + phys_addr_t *base, resource_size_t *size, + resource_size_t *align, resource_size_t *align_size); + int (*mw_set_trans)(struct ntb_dev *ntb, int idx, + dma_addr_t addr, resource_size_t size); + int (*mw_clear_trans)(struct ntb_dev *ntb, int idx); + + int (*link_is_up)(struct ntb_dev *ntb, + enum ntb_speed *speed, enum ntb_width *width); + int (*link_enable)(struct ntb_dev *ntb, + enum ntb_speed max_speed, enum ntb_width max_width); + int (*link_disable)(struct ntb_dev *ntb); + + int (*db_is_unsafe)(struct ntb_dev *ntb); + u64 (*db_valid_mask)(struct ntb_dev *ntb); + int (*db_vector_count)(struct ntb_dev *ntb); + u64 (*db_vector_mask)(struct ntb_dev *ntb, int db_vector); + + u64 (*db_read)(struct ntb_dev *ntb); + int (*db_set)(struct ntb_dev *ntb, u64 db_bits); + int (*db_clear)(struct ntb_dev *ntb, u64 db_bits); + + u64 (*db_read_mask)(struct ntb_dev *ntb); + int (*db_set_mask)(struct ntb_dev *ntb, u64 db_bits); + int (*db_clear_mask)(struct ntb_dev *ntb, u64 db_bits); + + int (*peer_db_addr)(struct ntb_dev *ntb, + phys_addr_t *db_addr, resource_size_t *db_size); + u64 (*peer_db_read)(struct ntb_dev *ntb); + int (*peer_db_set)(struct ntb_dev *ntb, u64 db_bits); + int (*peer_db_clear)(struct ntb_dev *ntb, u64 db_bits); + + u64 (*peer_db_read_mask)(struct ntb_dev *ntb); + int (*peer_db_set_mask)(struct ntb_dev *ntb, u64 db_bits); + int (*peer_db_clear_mask)(struct ntb_dev *ntb, u64 db_bits); + + int (*spad_is_unsafe)(struct ntb_dev *ntb); + int (*spad_count)(struct ntb_dev *ntb); + + u32 (*spad_read)(struct ntb_dev *ntb, int idx); + int (*spad_write)(struct ntb_dev *ntb, int idx, u32 val); + + int (*peer_spad_addr)(struct ntb_dev *ntb, int idx, + phys_addr_t *spad_addr); + u32 (*peer_spad_read)(struct ntb_dev *ntb, int idx); + int (*peer_spad_write)(struct ntb_dev *ntb, int idx, u32 val); +}; + +static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) +{ + /* commented callbacks are not required: */ + return + ops->mw_count && + ops->mw_get_range && + ops->mw_set_trans && + /* ops->mw_clear_trans && */ + ops->link_is_up && + ops->link_enable && + ops->link_disable && + /* ops->db_is_unsafe && */ + ops->db_valid_mask && + + /* both set, or both unset */ + (!ops->db_vector_count == !ops->db_vector_mask) && + + ops->db_read && + /* ops->db_set && */ + ops->db_clear && + /* ops->db_read_mask && */ + ops->db_set_mask && + ops->db_clear_mask && + ops->peer_db_addr && + /* ops->peer_db_read && */ + ops->peer_db_set && + /* ops->peer_db_clear && */ + /* ops->peer_db_read_mask && */ + /* ops->peer_db_set_mask && */ + /* ops->peer_db_clear_mask && */ + /* ops->spad_is_unsafe && */ + ops->spad_count && + ops->spad_read && + ops->spad_write && + ops->peer_spad_addr && + /* ops->peer_spad_read && */ + ops->peer_spad_write && + 1; +} + +/** + * struct ntb_client - client interested in ntb devices + * @drv: Linux driver object. + * @ops: See &ntb_client_ops. + */ +struct ntb_client { + struct device_driver drv; + const struct ntb_client_ops ops; +}; + +#define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv) + +/** + * struct ntb_device - ntb device + * @dev: Linux device object. + * @pdev: Pci device entry of the ntb. + * @topo: Detected topology of the ntb. + * @ops: See &ntb_dev_ops. + * @ctx: See &ntb_ctx_ops. + * @ctx_ops: See &ntb_ctx_ops. + */ +struct ntb_dev { + struct device dev; + struct pci_dev *pdev; + enum ntb_topo topo; + const struct ntb_dev_ops *ops; + void *ctx; + const struct ntb_ctx_ops *ctx_ops; + + /* private: */ + + /* synchronize setting, clearing, and calling ctx_ops */ + spinlock_t ctx_lock; + /* block unregister until device is fully released */ + struct completion released; +}; + +#define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev) + +/** + * ntb_register_client() - register a client for interest in ntb devices + * @client: Client context. + * + * The client will be added to the list of clients interested in ntb devices. + * The client will be notified of any ntb devices that are not already + * associated with a client, or if ntb devices are registered later. + * + * Return: Zero if the client is registered, otherwise an error number. + */ +#define ntb_register_client(client) \ + __ntb_register_client((client), THIS_MODULE, KBUILD_MODNAME) + +int __ntb_register_client(struct ntb_client *client, struct module *mod, + const char *mod_name); + +/** + * ntb_unregister_client() - unregister a client for interest in ntb devices + * @client: Client context. + * + * The client will be removed from the list of clients interested in ntb + * devices. If any ntb devices are associated with the client, the client will + * be notified to remove those devices. + */ +void ntb_unregister_client(struct ntb_client *client); + +#define module_ntb_client(__ntb_client) \ + module_driver(__ntb_client, ntb_register_client, \ + ntb_unregister_client) + +/** + * ntb_register_device() - register a ntb device + * @ntb: NTB device context. + * + * The device will be added to the list of ntb devices. If any clients are + * interested in ntb devices, each client will be notified of the ntb device, + * until at most one client accepts the device. + * + * Return: Zero if the device is registered, otherwise an error number. + */ +int ntb_register_device(struct ntb_dev *ntb); + +/** + * ntb_register_device() - unregister a ntb device + * @ntb: NTB device context. + * + * The device will be removed from the list of ntb devices. If the ntb device + * is associated with a client, the client will be notified to remove the + * device. + */ +void ntb_unregister_device(struct ntb_dev *ntb); + +/** + * ntb_set_ctx() - associate a driver context with an ntb device + * @ntb: NTB device context. + * @ctx: Driver context. + * @ctx_ops: Driver context operations. + * + * Associate a driver context and operations with a ntb device. The context is + * provided by the client driver, and the driver may associate a different + * context with each ntb device. + * + * Return: Zero if the context is associated, otherwise an error number. + */ +int ntb_set_ctx(struct ntb_dev *ntb, void *ctx, + const struct ntb_ctx_ops *ctx_ops); + +/** + * ntb_clear_ctx() - disassociate any driver context from an ntb device + * @ntb: NTB device context. + * + * Clear any association that may exist between a driver context and the ntb + * device. + */ +void ntb_clear_ctx(struct ntb_dev *ntb); + +/** + * ntb_link_event() - notify driver context of a change in link status + * @ntb: NTB device context. + * + * Notify the driver context that the link status may have changed. The driver + * should call ntb_link_is_up() to get the current status. + */ +void ntb_link_event(struct ntb_dev *ntb); + +/** + * ntb_db_event() - notify driver context of a doorbell event + * @ntb: NTB device context. + * @vector: Interrupt vector number. + * + * Notify the driver context of a doorbell event. If hardware supports + * multiple interrupt vectors for doorbells, the vector number indicates which + * vector received the interrupt. The vector number is relative to the first + * vector used for doorbells, starting at zero, and must be less than + ** ntb_db_vector_count(). The driver may call ntb_db_read() to check which + * doorbell bits need service, and ntb_db_vector_mask() to determine which of + * those bits are associated with the vector number. + */ +void ntb_db_event(struct ntb_dev *ntb, int vector); + +/** + * ntb_mw_count() - get the number of memory windows + * @ntb: NTB device context. + * + * Hardware and topology may support a different number of memory windows. + * + * Return: the number of memory windows. + */ +static inline int ntb_mw_count(struct ntb_dev *ntb) +{ + return ntb->ops->mw_count(ntb); +} + +/** + * ntb_mw_get_range() - get the range of a memory window + * @ntb: NTB device context. + * @idx: Memory window number. + * @base: OUT - the base address for mapping the memory window + * @size: OUT - the size for mapping the memory window + * @align: OUT - the base alignment for translating the memory window + * @align_size: OUT - the size alignment for translating the memory window + * + * Get the range of a memory window. NULL may be given for any output + * parameter if the value is not needed. The base and size may be used for + * mapping the memory window, to access the peer memory. The alignment and + * size may be used for translating the memory window, for the peer to access + * memory on the local system. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_mw_get_range(struct ntb_dev *ntb, int idx, + phys_addr_t *base, resource_size_t *size, + resource_size_t *align, resource_size_t *align_size) +{ + return ntb->ops->mw_get_range(ntb, idx, base, size, + align, align_size); +} + +/** + * ntb_mw_set_trans() - set the translation of a memory window + * @ntb: NTB device context. + * @idx: Memory window number. + * @addr: The dma address local memory to expose to the peer. + * @size: The size of the local memory to expose to the peer. + * + * Set the translation of a memory window. The peer may access local memory + * through the window starting at the address, up to the size. The address + * must be aligned to the alignment specified by ntb_mw_get_range(). The size + * must be aligned to the size alignment specified by ntb_mw_get_range(). + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int idx, + dma_addr_t addr, resource_size_t size) +{ + return ntb->ops->mw_set_trans(ntb, idx, addr, size); +} + +/** + * ntb_mw_clear_trans() - clear the translation of a memory window + * @ntb: NTB device context. + * @idx: Memory window number. + * + * Clear the translation of a memory window. The peer may no longer access + * local memory through the window. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx) +{ + if (!ntb->ops->mw_clear_trans) + return ntb->ops->mw_set_trans(ntb, idx, 0, 0); + + return ntb->ops->mw_clear_trans(ntb, idx); +} + +/** + * ntb_link_is_up() - get the current ntb link state + * @ntb: NTB device context. + * @speed: OUT - The link speed expressed as PCIe generation number. + * @width: OUT - The link width expressed as the number of PCIe lanes. + * + * Set the translation of a memory window. The peer may access local memory + * through the window starting at the address, up to the size. The address + * must be aligned to the alignment specified by ntb_mw_get_range(). The size + * must be aligned to the size alignment specified by ntb_mw_get_range(). + * + * Return: One if the link is up, zero if the link is down, otherwise a + * negative value indicating the error number. + */ +static inline int ntb_link_is_up(struct ntb_dev *ntb, + enum ntb_speed *speed, enum ntb_width *width) +{ + return ntb->ops->link_is_up(ntb, speed, width); +} + +/** + * ntb_link_enable() - enable the link on the secondary side of the ntb + * @ntb: NTB device context. + * @max_speed: The maximum link speed expressed as PCIe generation number. + * @max_width: The maximum link width expressed as the number of PCIe lanes. + * + * Enable the link on the secondary side of the ntb. This can only be done + * from the primary side of the ntb in primary or b2b topology. The ntb device + * should train the link to its maximum speed and width, or the requested speed + * and width, whichever is smaller, if supported. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_link_enable(struct ntb_dev *ntb, + enum ntb_speed max_speed, + enum ntb_width max_width) +{ + return ntb->ops->link_enable(ntb, max_speed, max_width); +} + +/** + * ntb_link_disable() - disable the link on the secondary side of the ntb + * @ntb: NTB device context. + * + * Disable the link on the secondary side of the ntb. This can only be + * done from the primary side of the ntb in primary or b2b topology. The ntb + * device should disable the link. Returning from this call must indicate that + * a barrier has passed, though with no more writes may pass in either + * direction across the link, except if this call returns an error number. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_link_disable(struct ntb_dev *ntb) +{ + return ntb->ops->link_disable(ntb); +} + +/** + * ntb_db_is_unsafe() - check if it is safe to use hardware doorbell + * @ntb: NTB device context. + * + * It is possible for some ntb hardware to be affected by errata. Hardware + * drivers can advise clients to avoid using doorbells. Clients may ignore + * this advice, though caution is recommended. + * + * Return: Zero if it is safe to use doorbells, or One if it is not safe. + */ +static inline int ntb_db_is_unsafe(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_is_unsafe) + return 0; + + return ntb->ops->db_is_unsafe(ntb); +} + +/** + * ntb_db_valid_mask() - get a mask of doorbell bits supported by the ntb + * @ntb: NTB device context. + * + * Hardware may support different number or arrangement of doorbell bits. + * + * Return: A mask of doorbell bits supported by the ntb. + */ +static inline u64 ntb_db_valid_mask(struct ntb_dev *ntb) +{ + return ntb->ops->db_valid_mask(ntb); +} + +/** + * ntb_db_vector_count() - get the number of doorbell interrupt vectors + * @ntb: NTB device context. + * + * Hardware may support different number of interrupt vectors. + * + * Return: The number of doorbell interrupt vectors. + */ +static inline int ntb_db_vector_count(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_vector_count) + return 1; + + return ntb->ops->db_vector_count(ntb); +} + +/** + * ntb_db_vector_mask() - get a mask of doorbell bits serviced by a vector + * @ntb: NTB device context. + * @vector: Doorbell vector number. + * + * Each interrupt vector may have a different number or arrangement of bits. + * + * Return: A mask of doorbell bits serviced by a vector. + */ +static inline u64 ntb_db_vector_mask(struct ntb_dev *ntb, int vector) +{ + if (!ntb->ops->db_vector_mask) + return ntb_db_valid_mask(ntb); + + return ntb->ops->db_vector_mask(ntb, vector); +} + +/** + * ntb_db_read() - read the local doorbell register + * @ntb: NTB device context. + * + * Read the local doorbell register, and return the bits that are set. + * + * Return: The bits currently set in the local doorbell register. + */ +static inline u64 ntb_db_read(struct ntb_dev *ntb) +{ + return ntb->ops->db_read(ntb); +} + +/** + * ntb_db_set() - set bits in the local doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to set. + * + * Set bits in the local doorbell register, which may generate a local doorbell + * interrupt. Bits that were already set must remain set. + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_set(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_set) + return -EINVAL; + + return ntb->ops->db_set(ntb, db_bits); +} + +/** + * ntb_db_clear() - clear bits in the local doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the local doorbell register, arming the bits for the next + * doorbell. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_clear(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->db_clear(ntb, db_bits); +} + +/** + * ntb_db_read_mask() - read the local doorbell mask + * @ntb: NTB device context. + * + * Read the local doorbell mask register, and return the bits that are set. + * + * This is unusual, though hardware is likely to support it. + * + * Return: The bits currently set in the local doorbell mask register. + */ +static inline u64 ntb_db_read_mask(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_read_mask) + return 0; + + return ntb->ops->db_read_mask(ntb); +} + +/** + * ntb_db_set_mask() - set bits in the local doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell mask bits to set. + * + * Set bits in the local doorbell mask register, preventing doorbell interrupts + * from being generated for those doorbell bits. Bits that were already set + * must remain set. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->db_set_mask(ntb, db_bits); +} + +/** + * ntb_db_clear_mask() - clear bits in the local doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the local doorbell mask register, allowing doorbell interrupts + * from being generated for those doorbell bits. If a doorbell bit is already + * set at the time the mask is cleared, and the corresponding mask bit is + * changed from set to clear, then the ntb driver must ensure that + * ntb_db_event() is called. If the hardware does not generate the interrupt + * on clearing the mask bit, then the driver must call ntb_db_event() anyway. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->db_clear_mask(ntb, db_bits); +} + +/** + * ntb_peer_db_addr() - address and size of the peer doorbell register + * @ntb: NTB device context. + * @db_addr: OUT - The address of the peer doorbell register. + * @db_size: OUT - The number of bytes to write the peer doorbell register. + * + * Return the address of the peer doorbell register. This may be used, for + * example, by drivers that offload memory copy operations to a dma engine. + * The drivers may wish to ring the peer doorbell at the completion of memory + * copy operations. For efficiency, and to simplify ordering of operations + * between the dma memory copies and the ringing doorbell, the driver may + * append one additional dma memory copy with the doorbell register as the + * destination, after the memory copy operations. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_addr(struct ntb_dev *ntb, + phys_addr_t *db_addr, + resource_size_t *db_size) +{ + return ntb->ops->peer_db_addr(ntb, db_addr, db_size); +} + +/** + * ntb_peer_db_read() - read the peer doorbell register + * @ntb: NTB device context. + * + * Read the peer doorbell register, and return the bits that are set. + * + * This is unusual, and hardware may not support it. + * + * Return: The bits currently set in the peer doorbell register. + */ +static inline u64 ntb_peer_db_read(struct ntb_dev *ntb) +{ + if (!ntb->ops->peer_db_read) + return 0; + + return ntb->ops->peer_db_read(ntb); +} + +/** + * ntb_peer_db_set() - set bits in the peer doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to set. + * + * Set bits in the peer doorbell register, which may generate a peer doorbell + * interrupt. Bits that were already set must remain set. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits) +{ + return ntb->ops->peer_db_set(ntb, db_bits); +} + +/** + * ntb_peer_db_clear() - clear bits in the local doorbell register + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the peer doorbell register, arming the bits for the next + * doorbell. + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_clear(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_clear) + return -EINVAL; + + return ntb->ops->peer_db_clear(ntb, db_bits); +} + +/** + * ntb_peer_db_read_mask() - read the peer doorbell mask + * @ntb: NTB device context. + * + * Read the peer doorbell mask register, and return the bits that are set. + * + * This is unusual, and hardware may not support it. + * + * Return: The bits currently set in the peer doorbell mask register. + */ +static inline u64 ntb_peer_db_read_mask(struct ntb_dev *ntb) +{ + if (!ntb->ops->db_read_mask) + return 0; + + return ntb->ops->peer_db_read_mask(ntb); +} + +/** + * ntb_peer_db_set_mask() - set bits in the peer doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell mask bits to set. + * + * Set bits in the peer doorbell mask register, preventing doorbell interrupts + * from being generated for those doorbell bits. Bits that were already set + * must remain set. + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_set_mask(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_set_mask) + return -EINVAL; + + return ntb->ops->peer_db_set_mask(ntb, db_bits); +} + +/** + * ntb_peer_db_clear_mask() - clear bits in the peer doorbell mask + * @ntb: NTB device context. + * @db_bits: Doorbell bits to clear. + * + * Clear bits in the peer doorbell mask register, allowing doorbell interrupts + * from being generated for those doorbell bits. If the hardware does not + * generate the interrupt on clearing the mask bit, then the driver should not + * implement this function! + * + * This is unusual, and hardware may not support it. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_db_clear_mask(struct ntb_dev *ntb, u64 db_bits) +{ + if (!ntb->ops->db_clear_mask) + return -EINVAL; + + return ntb->ops->peer_db_clear_mask(ntb, db_bits); +} + +/** + * ntb_spad_is_unsafe() - check if it is safe to use the hardware scratchpads + * @ntb: NTB device context. + * + * It is possible for some ntb hardware to be affected by errata. Hardware + * drivers can advise clients to avoid using scratchpads. Clients may ignore + * this advice, though caution is recommended. + * + * Return: Zero if it is safe to use scratchpads, or One if it is not safe. + */ +static inline int ntb_spad_is_unsafe(struct ntb_dev *ntb) +{ + if (!ntb->ops->spad_is_unsafe) + return 0; + + return ntb->ops->spad_is_unsafe(ntb); +} + +/** + * ntb_mw_count() - get the number of scratchpads + * @ntb: NTB device context. + * + * Hardware and topology may support a different number of scratchpads. + * + * Return: the number of scratchpads. + */ +static inline int ntb_spad_count(struct ntb_dev *ntb) +{ + return ntb->ops->spad_count(ntb); +} + +/** + * ntb_spad_read() - read the local scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * + * Read the local scratchpad register, and return the value. + * + * Return: The value of the local scratchpad register. + */ +static inline u32 ntb_spad_read(struct ntb_dev *ntb, int idx) +{ + return ntb->ops->spad_read(ntb, idx); +} + +/** + * ntb_spad_write() - write the local scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * @val: Scratchpad value. + * + * Write the value to the local scratchpad register. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val) +{ + return ntb->ops->spad_write(ntb, idx, val); +} + +/** + * ntb_peer_spad_addr() - address of the peer scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * @spad_addr: OUT - The address of the peer scratchpad register. + * + * Return the address of the peer doorbell register. This may be used, for + * example, by drivers that offload memory copy operations to a dma engine. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int idx, + phys_addr_t *spad_addr) +{ + return ntb->ops->peer_spad_addr(ntb, idx, spad_addr); +} + +/** + * ntb_peer_spad_read() - read the peer scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * + * Read the peer scratchpad register, and return the value. + * + * Return: The value of the local scratchpad register. + */ +static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int idx) +{ + return ntb->ops->peer_spad_read(ntb, idx); +} + +/** + * ntb_peer_spad_write() - write the peer scratchpad register + * @ntb: NTB device context. + * @idx: Scratchpad index. + * @val: Scratchpad value. + * + * Write the value to the peer scratchpad register. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32 val) +{ + return ntb->ops->peer_spad_write(ntb, idx, val); +} + +#endif -- cgit v1.2.3 From e26a5843f7f5014ae4460030ca4de029a3ac35d3 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Thu, 9 Apr 2015 10:33:20 -0400 Subject: NTB: Split ntb_hw_intel and ntb_transport drivers Change ntb_hw_intel to use the new NTB hardware abstraction layer. Split ntb_transport into its own driver. Change it to use the new NTB hardware abstraction layer. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb_transport.h | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ntb_transport.h b/include/linux/ntb_transport.h index f78b64cc09d5..2862861366a5 100644 --- a/include/linux/ntb_transport.h +++ b/include/linux/ntb_transport.h @@ -5,6 +5,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -13,6 +14,7 @@ * BSD LICENSE * * Copyright(c) 2012 Intel Corporation. All rights reserved. + * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -40,7 +42,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Intel PCIe NTB Linux driver + * PCIe NTB Transport Linux driver * * Contact Information: * Jon Mason @@ -48,21 +50,16 @@ struct ntb_transport_qp; -struct ntb_client { +struct ntb_transport_client { struct device_driver driver; - int (*probe)(struct pci_dev *pdev); - void (*remove)(struct pci_dev *pdev); + int (*probe)(struct device *client_dev); + void (*remove)(struct device *client_dev); }; -enum { - NTB_LINK_DOWN = 0, - NTB_LINK_UP, -}; - -int ntb_transport_register_client(struct ntb_client *drvr); -void ntb_transport_unregister_client(struct ntb_client *drvr); -int ntb_register_client_dev(char *device_name); -void ntb_unregister_client_dev(char *device_name); +int ntb_transport_register_client(struct ntb_transport_client *drvr); +void ntb_transport_unregister_client(struct ntb_transport_client *drvr); +int ntb_transport_register_client_dev(char *device_name); +void ntb_transport_unregister_client_dev(char *device_name); struct ntb_queue_handlers { void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, @@ -75,7 +72,7 @@ struct ntb_queue_handlers { unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp); unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp); struct ntb_transport_qp * -ntb_transport_create_queue(void *data, struct pci_dev *pdev, +ntb_transport_create_queue(void *data, struct device *client_dev, const struct ntb_queue_handlers *handlers); void ntb_transport_free_queue(struct ntb_transport_qp *qp); int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, -- cgit v1.2.3 From 0294112ee3135fbd15eaa70015af8283642dd970 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 4 Jul 2015 03:09:03 +0200 Subject: ACPI / PNP: Reserve ACPI resources at the fs_initcall_sync stage This effectively reverts the following three commits: 7bc10388ccdd ACPI / resources: free memory on error in add_region_before() 0f1b414d1907 ACPI / PNP: Avoid conflicting resource reservations b9a5e5e18fbf ACPI / init: Fix the ordering of acpi_reserve_resources() (commit b9a5e5e18fbf introduced regressions some of which, but not all, were addressed by commit 0f1b414d1907 and commit 7bc10388ccdd was a fixup on top of the latter) and causes ACPI fixed hardware resources to be reserved at the fs_initcall_sync stage of system initialization. The story is as follows. First, a boot regression was reported due to an apparent resource reservation ordering change after a commit that shouldn't lead to such changes. Investigation led to the conclusion that the problem happened because acpi_reserve_resources() was executed at the device_initcall() stage of system initialization which wasn't strictly ordered with respect to driver initialization (and with respect to the initialization of the pcieport driver in particular), so a random change causing the device initcalls to be run in a different order might break things. The response to that was to attempt to run acpi_reserve_resources() as soon as we knew that ACPI would be in use (commit b9a5e5e18fbf). However, that turned out to be too early, because it caused resource reservations made by the PNP system driver to fail on at least one system and that failure was addressed by commit 0f1b414d1907. That fix still turned out to be insufficient, though, because calling acpi_reserve_resources() before the fs_initcall stage of system initialization caused a boot regression to happen on the eCAFE EC-800-H20G/S netbook. That meant that we only could call acpi_reserve_resources() at the fs_initcall initialization stage or later, but then we might just as well call it after the PNP initalization in which case commit 0f1b414d1907 wouldn't be necessary any more. For this reason, the changes made by commit 0f1b414d1907 are reverted (along with a memory leak fixup on top of that commit), the changes made by commit b9a5e5e18fbf that went too far are reverted too and acpi_reserve_resources() is changed into fs_initcall_sync, which will cause it to be executed after the PNP subsystem initialization (which is an fs_initcall) and before device initcalls (including the pcieport driver initialization) which should avoid the initial issue. Link: https://bugzilla.kernel.org/show_bug.cgi?id=100581 Link: http://marc.info/?t=143092384600002&r=1&w=2 Link: https://bugzilla.kernel.org/show_bug.cgi?id=99831 Link: http://marc.info/?t=143389402600001&r=1&w=2 Fixes: b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()" Reported-by: Roland Dreier Cc: All applicable Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c471dfc93b71..fedfd1bea3bf 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -309,9 +309,6 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_resources_are_enforced(void); -int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, - unsigned long flags, char *desc); - #ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); #endif @@ -507,13 +504,6 @@ static inline int acpi_check_region(resource_size_t start, resource_size_t n, return 0; } -static inline int acpi_reserve_region(u64 start, unsigned int length, - u8 space_id, unsigned long flags, - char *desc) -{ - return -ENXIO; -} - struct acpi_table_header; static inline int acpi_table_parse(char *id, int (*handler)(struct acpi_table_header *)) -- cgit v1.2.3 From 26095a01d359827eeccec5459c28ddd976183179 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Tue, 7 Jul 2015 01:55:20 +0200 Subject: ACPI / scan: Add support for ACPI _CLS device matching Device drivers typically use ACPI _HIDs/_CIDs listed in struct device_driver acpi_match_table to match devices. However, for generic drivers, we do not want to list _HID for all supported devices. Also, certain classes of devices do not have _CID (e.g. SATA, USB). Instead, we can leverage ACPI _CLS, which specifies PCI-defined class code (i.e. base-class, subclass and programming interface). This patch adds support for matching ACPI devices using the _CLS method. To support loadable module, current design uses _HID or _CID to match device's modalias. With the new way of matching with _CLS this would requires modification to the current ACPI modalias key to include _CLS. This patch appends PCI-defined class-code to the existing ACPI modalias as following. acpi::::..::: E.g: # cat /sys/devices/platform/AMDI0600:00/modalias acpi:AMDI0600:010601: where bb is th base-class code, ss is te sub-class code, and pp is the programming interface code Since there would not be _HID/_CID in the ACPI matching table of the driver, this patch adds a field to acpi_device_id to specify the matching _CLS. static const struct acpi_device_id ahci_acpi_match[] = { { ACPI_DEVICE_CLASS(PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff) }, {}, }; In this case, the corresponded entry in modules.alias file would be: alias acpi*:010601:* ahci_platform Acked-by: Mika Westerberg Reviewed-by: Hanjun Guo Signed-off-by: Suravee Suthikulpanit Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 14 ++++++++++++++ include/linux/mod_devicetable.h | 2 ++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c471dfc93b71..bdfdb9f65c23 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -58,6 +58,19 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) +/** + * ACPI_DEVICE_CLASS - macro used to describe an ACPI device with + * the PCI-defined class-code information + * + * @_cls : the class, subclass, prog-if triple for this device + * @_msk : the class mask for this device + * + * This macro is used to create a struct acpi_device_id that matches a + * specific PCI class. The .id and .driver_data fields will be left + * initialized with the default value. + */ +#define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (_cls), .cls_msk = (_msk), + static inline bool has_acpi_companion(struct device *dev) { return is_acpi_node(dev->fwnode); @@ -446,6 +459,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *); #define ACPI_COMPANION(dev) (NULL) #define ACPI_COMPANION_SET(dev, adev) do { } while (0) #define ACPI_HANDLE(dev) (NULL) +#define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (0), .cls_msk = (0), struct fwnode_handle; diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 8183d6640ca7..34f25b7bf642 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -189,6 +189,8 @@ struct css_device_id { struct acpi_device_id { __u8 id[ACPI_ID_LEN]; kernel_ulong_t driver_data; + __u32 cls; + __u32 cls_msk; }; #define PNP_ID_LEN 8 -- cgit v1.2.3 From f32dd117051185da6e923b35491a44d7debeeea5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:29:38 +0200 Subject: tick/broadcast: Make idle check independent from mode and config Currently the broadcast busy check, which prevents the idle code from going into deep idle, works only in one shot mode. If NOHZ and HIGHRES are off (config or command line) there is no sanity check at all, so under certain conditions cpus are allowed to go into deep idle, where the local timer stops, and are not woken up again because there is no broadcast timer installed or a hrtimer based broadcast device is not evaluated. Move tick_broadcast_oneshot_control() into the common code and provide proper subfunctions for the various config combinations. The common check in tick_broadcast_oneshot_control() is for the C3STOP misfeature flag of the local clock event device. If its not set, idle can proceed. If set, further checks are necessary. Provide checks for the trivial cases: - If broadcast is disabled in the config, then return busy - If oneshot mode (NOHZ/HIGHES) is disabled in the config, return busy if the broadcast device is hrtimer based. - If oneshot mode is enabled in the config call the original tick_broadcast_oneshot_control() function. That function needs extra checks which will be implemented in seperate patches. [ Split out from a larger combo patch ] Reported-and-tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- include/linux/tick.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 3741ba1a652c..6916dcb61857 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -67,11 +67,7 @@ extern void tick_broadcast_control(enum tick_broadcast_mode mode); static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ -#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); -#else -static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { return 0; } -#endif static inline void tick_broadcast_enable(void) { -- cgit v1.2.3 From 37b64a42067a04a22468c4e52c12af00d72e462b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 21:56:34 +0200 Subject: tick/broadcast: Unbreak CONFIG_GENERIC_CLOCKEVENTS=n build Making tick_broadcast_oneshot_control() independent from CONFIG_GENERIC_CLOCKEVENTS_BROADCAST broke the build for CONFIG_GENERIC_CLOCKEVENTS=n because the function is not defined there. Provide a proper stub inline. Fixes: f32dd1170511 'tick/broadcast: Make idle check independent from mode and config' Reported-by: kbuild test robot Signed-off-by: Thomas Gleixner --- include/linux/tick.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 6916dcb61857..edbfc9a5293e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -67,7 +67,14 @@ extern void tick_broadcast_control(enum tick_broadcast_mode mode); static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ +#ifdef CONFIG_GENERIC_CLOCKEVENTS extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); +#else +static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + return 0; +} +#endif static inline void tick_broadcast_enable(void) { -- cgit v1.2.3 From a899418167264c7bac574b1a0f1b2c26c5b0995a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 17:12:30 +0000 Subject: hotplug: Prevent alloc/free of irq descriptors during cpu up/down When a cpu goes up some architectures (e.g. x86) have to walk the irq space to set up the vector space for the cpu. While this needs extra protection at the architecture level we can avoid a few race conditions by preventing the concurrent allocation/free of irq descriptors and the associated data. When a cpu goes down it moves the interrupts which are targeted to this cpu away by reassigning the affinities. While this happens interrupts can be allocated and freed, which opens a can of race conditions in the code which reassignes the affinities because interrupt descriptors might be freed underneath. Example: CPU1 CPU2 cpu_up/down irq_desc = irq_to_desc(irq); remove_from_radix_tree(desc); raw_spin_lock(&desc->lock); free(desc); We could protect the irq descriptors with RCU, but that would require a full tree change of all accesses to interrupt descriptors. But fortunately these kind of race conditions are rather limited to a few things like cpu hotplug. The normal setup/teardown is very well serialized. So the simpler and obvious solution is: Prevent allocation and freeing of interrupt descriptors accross cpu hotplug. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: xiao jin Cc: Joerg Roedel Cc: Borislav Petkov Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/20150705171102.063519515@linutronix.de --- include/linux/irqdesc.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 624a668e61f1..fcea4e48e21f 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -87,7 +87,12 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; -#ifndef CONFIG_SPARSE_IRQ +#ifdef CONFIG_SPARSE_IRQ +extern void irq_lock_sparse(void); +extern void irq_unlock_sparse(void); +#else +static inline void irq_lock_sparse(void) { } +static inline void irq_unlock_sparse(void) { } extern struct irq_desc irq_desc[NR_IRQS]; #endif -- cgit v1.2.3 From 1f6823faa8c563431a94e614d2b63ce16bb6f658 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 9 Jul 2015 10:51:46 +0200 Subject: time: Get rid of do_posix_clock_monotonic_gettime All users gone. Remove it before we get another one. Signed-off-by: Thomas Gleixner --- include/linux/timekeeping.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 3aa72e648650..6e191e4e6ab6 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -145,7 +145,6 @@ static inline void getboottime(struct timespec *ts) } #endif -#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) #define ktime_get_real_ts64(ts) getnstimeofday64(ts) /* -- cgit v1.2.3 From 757856d2b9568a701df9ea6a4be68effbb9d6f44 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 25 Jun 2015 17:47:45 +0300 Subject: libceph: enable ceph in a non-default network namespace Grab a reference on a network namespace of the 'rbd map' (in case of rbd) or 'mount' (in case of ceph) process and use that to open sockets instead of always using init_net and bailing if network namespace is anything but init_net. Be careful to not share struct ceph_client instances between different namespaces and don't add any code in the !CONFIG_NET_NS case. This is based on a patch from Hong Zhiguo . Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/messenger.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e15499422fdc..37753278987a 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,7 @@ struct ceph_messenger { struct ceph_entity_addr my_enc_addr; atomic_t stopping; + possible_net_t net; bool nocrc; bool tcp_nodelay; @@ -267,6 +269,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr, u64 required_features, bool nocrc, bool tcp_nodelay); +extern void ceph_messenger_fini(struct ceph_messenger *msgr); extern void ceph_con_init(struct ceph_connection *con, void *private, const struct ceph_connection_operations *ops, -- cgit v1.2.3 From 4a0e3e989d66bb7204b163d9cfaa7fa96d0f2023 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Wed, 8 Jul 2015 13:05:57 +0200 Subject: cdc_ncm: Add support for moving NDP to end of NCM frame NCM specs are not actually mandating a specific position in the frame for the NDP (Network Datagram Pointer). However, some Huawei devices will ignore our aggregates if it is not placed after the datagrams it points to. Add support for doing just this, in a per-device configurable way. While at it, update NCM subdrivers, disabling this functionality in all of them, except in huawei_cdc_ncm where it is enabled instead. We aren't making any distinction between different Huawei NCM devices, based on what the vendor driver does. Standard NCM devices are left unaffected: if they are compliant, they should be always usable, still stay on the safe side. This change has been tested and working with a Huawei E3131 device (which works regardless of NDP position), a Huawei E3531 (also working both ways) and an E3372 (which mandates NDP to be after indexed datagrams). V1->V2: - corrected wrong NDP acronym definition - fixed possible NULL pointer dereference - patch cleanup V2->V3: - Properly account for the NDP size when writing new packets to SKB Signed-off-by: Enrico Mioso Signed-off-by: David S. Miller --- include/linux/usb/cdc_ncm.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 7c9b484735c5..1f6526c76ee8 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -80,6 +80,9 @@ #define CDC_NCM_TIMER_INTERVAL_MIN 5UL #define CDC_NCM_TIMER_INTERVAL_MAX (U32_MAX / NSEC_PER_USEC) +/* Driver flags */ +#define CDC_NCM_FLAG_NDP_TO_END 0x02 /* NDP is placed at end of frame */ + #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) #define cdc_ncm_data_intf_is_mbim(x) ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB) @@ -103,9 +106,11 @@ struct cdc_ncm_ctx { spinlock_t mtx; atomic_t stop; + int drvflags; u32 timer_interval; u32 max_ndp_size; + struct usb_cdc_ncm_ndp16 *delayed_ndp16; u32 tx_timer_pending; u32 tx_curr_frame_num; @@ -133,7 +138,7 @@ struct cdc_ncm_ctx { }; u8 cdc_ncm_select_altsetting(struct usb_interface *intf); -int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); +int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags); void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); -- cgit v1.2.3 From d3b58c47d330de8c29898fe9746f7530408f8a59 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 26 Jun 2015 11:58:19 +0200 Subject: can: replace timestamp as unique skb attribute Commit 514ac99c64b "can: fix multiple delivery of a single CAN frame for overlapping CAN filters" requires the skb->tstamp to be set to check for identical CAN skbs. Without timestamping to be required by user space applications this timestamp was not generated which lead to commit 36c01245eb8 "can: fix loss of CAN frames in raw_rcv" - which forces the timestamp to be set in all CAN related skbuffs by introducing several __net_timestamp() calls. This forces e.g. out of tree drivers which are not using alloc_can{,fd}_skb() to add __net_timestamp() after skbuff creation to prevent the frame loss fixed in mainline Linux. This patch removes the timestamp dependency and uses an atomic counter to create an unique identifier together with the skbuff pointer. Btw: the new skbcnt element introduced in struct can_skb_priv has to be initialized with zero in out-of-tree drivers which are not using alloc_can{,fd}_skb() too. Signed-off-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index b6a52a4b457a..51bb6532785c 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -27,10 +27,12 @@ /** * struct can_skb_priv - private additional data inside CAN sk_buffs * @ifindex: ifindex of the first interface the CAN frame appeared on + * @skbcnt: atomic counter to have an unique id together with skb pointer * @cf: align to the following CAN frame at skb->data */ struct can_skb_priv { int ifindex; + int skbcnt; struct can_frame cf[0]; }; -- cgit v1.2.3