From 3dc59262f76241b342316bbac8b5ffe138995b2d Mon Sep 17 00:00:00 2001 From: Shrirang Bagul Date: Thu, 24 Nov 2016 13:33:43 +0800 Subject: iio: st_sensors: match sensors using ACPI handle Add support to match st sensors using information passed from ACPI DST tables. Signed-off-by: Shrirang Bagul Signed-off-by: Jonathan Cameron --- include/linux/iio/common/st_sensors_i2c.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/common/st_sensors_i2c.h b/include/linux/iio/common/st_sensors_i2c.h index 1796af093368..254de3c7dde8 100644 --- a/include/linux/iio/common/st_sensors_i2c.h +++ b/include/linux/iio/common/st_sensors_i2c.h @@ -28,4 +28,13 @@ static inline void st_sensors_of_i2c_probe(struct i2c_client *client, } #endif +#ifdef CONFIG_ACPI +int st_sensors_match_acpi_device(struct device *dev); +#else +static inline int st_sensors_match_acpi_device(struct device *dev) +{ + return -ENODEV; +} +#endif + #endif /* ST_SENSORS_I2C_H */ -- cgit v1.2.3 From a96cd0f901eecd9589477cc2cd46bdb4f1f3e49a Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 28 Nov 2016 14:41:16 -0800 Subject: iio: accel: hid-sensor-accel-3d: Add timestamp Added timestamp channel. With this change, each sample has a timestamp. This timestamp can be from the sensor hub when present or local kernel timestamp. HID sensors can send timestamp with input data using usage id HID_USAGE_SENSOR_TIME_TIMESTAMP. This timestamp value is converted to nano seconds before pushing this sample to the iio core. Signed-off-by: Srinivas Pandruvada Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-hub.h | 4 ++++ include/linux/hid-sensor-ids.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index dd85f3503410..7ef111d3ecc5 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -232,6 +232,7 @@ struct hid_sensor_common { atomic_t data_ready; atomic_t user_requested_state; struct iio_trigger *trigger; + int timestamp_ns_scale; struct hid_sensor_hub_attribute_info poll; struct hid_sensor_hub_attribute_info report_state; struct hid_sensor_hub_attribute_info power_state; @@ -271,4 +272,7 @@ int hid_sensor_format_scale(u32 usage_id, s32 hid_sensor_read_poll_value(struct hid_sensor_common *st); +int64_t hid_sensor_convert_timestamp(struct hid_sensor_common *st, + int64_t raw_value); + #endif diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index f2ee90aed0c2..9a3a9db1b39d 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -95,6 +95,7 @@ #define HID_USAGE_SENSOR_TIME_HOUR 0x200525 #define HID_USAGE_SENSOR_TIME_MINUTE 0x200526 #define HID_USAGE_SENSOR_TIME_SECOND 0x200527 +#define HID_USAGE_SENSOR_TIME_TIMESTAMP 0x200529 /* Units */ #define HID_USAGE_SENSOR_UNITS_NOT_SPECIFIED 0x00 -- cgit v1.2.3 From 122020af856181c24fe45903e43e3cc987c175f7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Nov 2016 15:46:42 +0000 Subject: dma-buf: Provide wrappers for reservation's lock Joonas complained that writing ww_mutex_lock(&resv->lock, ctx) was too intrusive compared to reservation_object_lock(resv, ctx); Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Joonas Lahtinen Reviewed-by: Alex Deucher Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161115154642.31850-1-chris@chris-wilson.co.uk --- include/linux/reservation.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reservation.h b/include/linux/reservation.h index d9706a6f5ae2..2b5a4679daea 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -144,6 +144,40 @@ reservation_object_get_list(struct reservation_object *obj) reservation_object_held(obj)); } +/** + * reservation_object_lock - lock the reservation object + * @obj: the reservation object + * @ctx: the locking context + * + * Locks the reservation object for exclusive access and modification. Note, + * that the lock is only against other writers, readers will run concurrently + * with a writer under RCU. The seqlock is used to notify readers if they + * overlap with a writer. + * + * As the reservation object may be locked by multiple parties in an + * undefined order, a #ww_acquire_ctx is passed to unwind if a cycle + * is detected. See ww_mutex_lock() and ww_acquire_init(). A reservation + * object may be locked by itself by passing NULL as @ctx. + */ +static inline int +reservation_object_lock(struct reservation_object *obj, + struct ww_acquire_ctx *ctx) +{ + return ww_mutex_lock(&obj->lock, ctx); +} + +/** + * reservation_object_unlock - unlock the reservation object + * @obj: the reservation object + * + * Unlocks the reservation object following exclusive access. + */ +static inline void +reservation_object_unlock(struct reservation_object *obj) +{ + ww_mutex_unlock(&obj->lock); +} + /** * reservation_object_get_excl - get the reservation object's * exclusive fence, with update-side lock held -- cgit v1.2.3 From 2904a8c1311f02896635fd35744262413a0b2726 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 9 Dec 2016 19:53:07 +0100 Subject: dma-buf: Reorganize device dma access docs - Put the initial overview for dma-buf into dma-buf.rst. - Put all the comments about detailed semantics into the right kernel-doc comment for functions or ops structure member. - To allow that detail, switch the reworked kerneldoc to inline style for dma_buf_ops. - Tie everything together into a much more streamlined overview comment, relying on the hyperlinks for all the details. - Also sprinkle some links into the kerneldoc for dma_buf and dma_buf_attachment to tie it all together. Cc: linux-doc@vger.kernel.org Cc: Jonathan Corbet Cc: Sumit Semwal Signed-off-by: Daniel Vetter Signed-off-by: Sumit Semwal Link: http://patchwork.freedesktop.org/patch/msgid/20161209185309.1682-4-daniel.vetter@ffwll.ch --- include/linux/dma-buf.h | 133 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 110 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 8daeb3ce0016..6df170fb243f 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -39,19 +39,6 @@ struct dma_buf_attachment; /** * struct dma_buf_ops - operations possible on struct dma_buf - * @attach: [optional] allows different devices to 'attach' themselves to the - * given buffer. It might return -EBUSY to signal that backing storage - * is already allocated and incompatible with the requirements - * of requesting device. - * @detach: [optional] detach a given device from this buffer. - * @map_dma_buf: returns list of scatter pages allocated, increases usecount - * of the buffer. Requires atleast one attach to be called - * before. Returned sg list should already be mapped into - * _device_ address space. This call may sleep. May also return - * -EINTR. Should return -EINVAL if attach hasn't been called yet. - * @unmap_dma_buf: decreases usecount of buffer, might deallocate scatter - * pages. - * @release: release this buffer; to be called after the last dma_buf_put. * @begin_cpu_access: [optional] called before cpu access to invalidate cpu * caches and allocate backing storage (if not yet done) * respectively pin the object into memory. @@ -72,25 +59,109 @@ struct dma_buf_attachment; * @vunmap: [optional] unmaps a vmap from the buffer */ struct dma_buf_ops { + /** + * @attach: + * + * This is called from dma_buf_attach() to make sure that a given + * &device can access the provided &dma_buf. Exporters which support + * buffer objects in special locations like VRAM or device-specific + * carveout areas should check whether the buffer could be move to + * system memory (or directly accessed by the provided device), and + * otherwise need to fail the attach operation. + * + * The exporter should also in general check whether the current + * allocation fullfills the DMA constraints of the new device. If this + * is not the case, and the allocation cannot be moved, it should also + * fail the attach operation. + * + * Any exporter-private housekeeping data can be stored in the priv + * pointer of &dma_buf_attachment structure. + * + * This callback is optional. + * + * Returns: + * + * 0 on success, negative error code on failure. It might return -EBUSY + * to signal that backing storage is already allocated and incompatible + * with the requirements of requesting device. + */ int (*attach)(struct dma_buf *, struct device *, - struct dma_buf_attachment *); + struct dma_buf_attachment *); + /** + * @detach: + * + * This is called by dma_buf_detach() to release a &dma_buf_attachment. + * Provided so that exporters can clean up any housekeeping for an + * &dma_buf_attachment. + * + * This callback is optional. + */ void (*detach)(struct dma_buf *, struct dma_buf_attachment *); - /* For {map,unmap}_dma_buf below, any specific buffer attributes - * required should get added to device_dma_parameters accessible - * via dev->dma_params. + /** + * @map_dma_buf: + * + * This is called by dma_buf_map_attachment() and is used to map a + * shared &dma_buf into device address space, and it is mandatory. It + * can only be called if @attach has been called successfully. This + * essentially pins the DMA buffer into place, and it cannot be moved + * any more + * + * This call may sleep, e.g. when the backing storage first needs to be + * allocated, or moved to a location suitable for all currently attached + * devices. + * + * Note that any specific buffer attributes required for this function + * should get added to device_dma_parameters accessible via + * device->dma_params from the &dma_buf_attachment. The @attach callback + * should also check these constraints. + * + * If this is being called for the first time, the exporter can now + * choose to scan through the list of attachments for this buffer, + * collate the requirements of the attached devices, and choose an + * appropriate backing storage for the buffer. + * + * Based on enum dma_data_direction, it might be possible to have + * multiple users accessing at the same time (for reading, maybe), or + * any other kind of sharing that the exporter might wish to make + * available to buffer-users. + * + * Returns: + * + * A &sg_table scatter list of or the backing storage of the DMA buffer, + * already mapped into the device address space of the &device attached + * with the provided &dma_buf_attachment. + * + * On failure, returns a negative error value wrapped into a pointer. + * May also return -EINTR when a signal was received while being + * blocked. */ struct sg_table * (*map_dma_buf)(struct dma_buf_attachment *, - enum dma_data_direction); + enum dma_data_direction); + /** + * @unmap_dma_buf: + * + * This is called by dma_buf_unmap_attachment() and should unmap and + * release the &sg_table allocated in @map_dma_buf, and it is mandatory. + * It should also unpin the backing storage if this is the last mapping + * of the DMA buffer, it the exporter supports backing storage + * migration. + */ void (*unmap_dma_buf)(struct dma_buf_attachment *, - struct sg_table *, - enum dma_data_direction); + struct sg_table *, + enum dma_data_direction); + /* TODO: Add try_map_dma_buf version, to return immed with -EBUSY * if the call would block. */ - /* after final dma_buf_put() */ + /** + * @release: + * + * Called after the last dma_buf_put to release the &dma_buf, and + * mandatory. + */ void (*release)(struct dma_buf *); int (*begin_cpu_access)(struct dma_buf *, enum dma_data_direction); @@ -124,6 +195,15 @@ struct dma_buf_ops { * @poll: for userspace poll support * @cb_excl: for userspace poll support * @cb_shared: for userspace poll support + * + * This represents a shared buffer, created by calling dma_buf_export(). The + * userspace representation is a normal file descriptor, which can be created by + * calling dma_buf_fd(). + * + * Shared dma buffers are reference counted using dma_buf_put() and + * get_dma_buf(). + * + * Device DMA access is handled by the separate struct &dma_buf_attachment. */ struct dma_buf { size_t size; @@ -160,6 +240,11 @@ struct dma_buf { * This structure holds the attachment information between the dma_buf buffer * and its user device(s). The list contains one attachment struct per device * attached to the buffer. + * + * An attachment is created by calling dma_buf_attach(), and released again by + * calling dma_buf_detach(). The DMA mapping itself needed to initiate a + * transfer is created by dma_buf_map_attachment() and freed again by calling + * dma_buf_unmap_attachment(). */ struct dma_buf_attachment { struct dma_buf *dmabuf; @@ -192,9 +277,11 @@ struct dma_buf_export_info { }; /** - * helper macro for exporters; zeros and fills in most common values - * + * DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters * @name: export-info name + * + * DEFINE_DMA_BUF_EXPORT_INFO macro defines the struct &dma_buf_export_info, + * zeroes it out and pre-populates exp_name in it. */ #define DEFINE_DMA_BUF_EXPORT_INFO(name) \ struct dma_buf_export_info name = { .exp_name = KBUILD_MODNAME, \ -- cgit v1.2.3 From 0959a1683d78270bab6381d498707fb8655ae11c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 9 Dec 2016 19:53:08 +0100 Subject: dma-buf: Update cpu access documentation - Again move the information relevant for driver writers next to the callbacks. - Put the overview and userspace interface documentation into a DOC: section within the code. - Remove the text that mmap needs to be coherent - since the DMA_BUF_IOCTL_SYNC landed that's no longer the case. But keep the text that for pte zapping exporters need to adjust the address space. - Add a FIXME that kmap and the new begin/end stuff used by the SYNC ioctl don't really mix correctly. That's something I just realized while doing this doc rework. - Augment function and structure docs like usual. Cc: linux-doc@vger.kernel.org Cc: Jonathan Corbet Cc: Sumit Semwal Signed-off-by: Daniel Vetter Signed-off-by: Sumit Semwal [sumits: fix cosmetic issues] Link: http://patchwork.freedesktop.org/patch/msgid/20161209185309.1682-5-daniel.vetter@ffwll.ch --- include/linux/dma-buf.h | 91 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 83 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 6df170fb243f..57828154e440 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -39,10 +39,6 @@ struct dma_buf_attachment; /** * struct dma_buf_ops - operations possible on struct dma_buf - * @begin_cpu_access: [optional] called before cpu access to invalidate cpu - * caches and allocate backing storage (if not yet done) - * respectively pin the object into memory. - * @end_cpu_access: [optional] called after cpu access to flush caches. * @kmap_atomic: maps a page from the buffer into kernel address * space, users may not block until the subsequent unmap call. * This callback must not sleep. @@ -50,10 +46,6 @@ struct dma_buf_attachment; * This Callback must not sleep. * @kmap: maps a page from the buffer into kernel address space. * @kunmap: [optional] unmaps a page from the buffer. - * @mmap: used to expose the backing storage to userspace. Note that the - * mapping needs to be coherent - if the exporter doesn't directly - * support this, it needs to fake coherency by shooting down any ptes - * when transitioning away from the cpu domain. * @vmap: [optional] creates a virtual mapping for the buffer into kernel * address space. Same restrictions as for vmap and friends apply. * @vunmap: [optional] unmaps a vmap from the buffer @@ -164,13 +156,96 @@ struct dma_buf_ops { */ void (*release)(struct dma_buf *); + /** + * @begin_cpu_access: + * + * This is called from dma_buf_begin_cpu_access() and allows the + * exporter to ensure that the memory is actually available for cpu + * access - the exporter might need to allocate or swap-in and pin the + * backing storage. The exporter also needs to ensure that cpu access is + * coherent for the access direction. The direction can be used by the + * exporter to optimize the cache flushing, i.e. access with a different + * direction (read instead of write) might return stale or even bogus + * data (e.g. when the exporter needs to copy the data to temporary + * storage). + * + * This callback is optional. + * + * FIXME: This is both called through the DMA_BUF_IOCTL_SYNC command + * from userspace (where storage shouldn't be pinned to avoid handing + * de-factor mlock rights to userspace) and for the kernel-internal + * users of the various kmap interfaces, where the backing storage must + * be pinned to guarantee that the atomic kmap calls can succeed. Since + * there's no in-kernel users of the kmap interfaces yet this isn't a + * real problem. + * + * Returns: + * + * 0 on success or a negative error code on failure. This can for + * example fail when the backing storage can't be allocated. Can also + * return -ERESTARTSYS or -EINTR when the call has been interrupted and + * needs to be restarted. + */ int (*begin_cpu_access)(struct dma_buf *, enum dma_data_direction); + + /** + * @end_cpu_access: + * + * This is called from dma_buf_end_cpu_access() when the importer is + * done accessing the CPU. The exporter can use this to flush caches and + * unpin any resources pinned in @begin_cpu_access. + * The result of any dma_buf kmap calls after end_cpu_access is + * undefined. + * + * This callback is optional. + * + * Returns: + * + * 0 on success or a negative error code on failure. Can return + * -ERESTARTSYS or -EINTR when the call has been interrupted and needs + * to be restarted. + */ int (*end_cpu_access)(struct dma_buf *, enum dma_data_direction); void *(*kmap_atomic)(struct dma_buf *, unsigned long); void (*kunmap_atomic)(struct dma_buf *, unsigned long, void *); void *(*kmap)(struct dma_buf *, unsigned long); void (*kunmap)(struct dma_buf *, unsigned long, void *); + /** + * @mmap: + * + * This callback is used by the dma_buf_mmap() function + * + * Note that the mapping needs to be incoherent, userspace is expected + * to braket CPU access using the DMA_BUF_IOCTL_SYNC interface. + * + * Because dma-buf buffers have invariant size over their lifetime, the + * dma-buf core checks whether a vma is too large and rejects such + * mappings. The exporter hence does not need to duplicate this check. + * Drivers do not need to check this themselves. + * + * If an exporter needs to manually flush caches and hence needs to fake + * coherency for mmap support, it needs to be able to zap all the ptes + * pointing at the backing storage. Now linux mm needs a struct + * address_space associated with the struct file stored in vma->vm_file + * to do that with the function unmap_mapping_range. But the dma_buf + * framework only backs every dma_buf fd with the anon_file struct file, + * i.e. all dma_bufs share the same file. + * + * Hence exporters need to setup their own file (and address_space) + * association by setting vma->vm_file and adjusting vma->vm_pgoff in + * the dma_buf mmap callback. In the specific case of a gem driver the + * exporter could use the shmem file already provided by gem (and set + * vm_pgoff = 0). Exporters can then zap ptes by unmapping the + * corresponding range of the struct address_space associated with their + * own file. + * + * This callback is optional. + * + * Returns: + * + * 0 on success or a negative error code on failure. + */ int (*mmap)(struct dma_buf *, struct vm_area_struct *vma); void *(*vmap)(struct dma_buf *); -- cgit v1.2.3 From 409c69be433b819c924a8d1c457a835bc6d51700 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 10 Dec 2016 11:51:11 +0200 Subject: ASoC: samsung: Remove tests of member address The driver was checking for non-NULL address of struct's members: - s3c_audio_pdata->type (union), - s3c_audio_pdata->type.i2s (embedded struct). This is pointless as these will be always non-NULL. The 's3c_audio_pdata' is always initialized in static memory so it will be zeroed. Additionally the 'type' member was an union with only one member. It is safe to reorganize the structures to get rid of useless union and checks for addresses to fix the coccinelle warning: >> sound/soc/samsung/i2s.c:1270:2-4: ERROR: test of a variable/field address Reported-by: kbuild test robot Signed-off-by: Krzysztof Kozlowski Reviewed-by: Bartlomiej Zolnierkiewicz Signed-off-by: Mark Brown --- include/linux/platform_data/asoc-s3c.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/asoc-s3c.h b/include/linux/platform_data/asoc-s3c.h index 15bf56ee8af7..90641a5daaf0 100644 --- a/include/linux/platform_data/asoc-s3c.h +++ b/include/linux/platform_data/asoc-s3c.h @@ -18,7 +18,7 @@ extern void s3c64xx_ac97_setup_gpio(int); -struct samsung_i2s { +struct samsung_i2s_type { /* If the Primary DAI has 5.1 Channels */ #define QUIRK_PRI_6CHAN (1 << 0) /* If the I2S block has a Stereo Overlay Channel */ @@ -47,7 +47,5 @@ struct s3c_audio_pdata { void *dma_capture; void *dma_play_sec; void *dma_capture_mic; - union { - struct samsung_i2s i2s; - } type; + struct samsung_i2s_type type; }; -- cgit v1.2.3 From e8314d7d53c8b050aac2828a5de5f28a997b468b Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Tue, 6 Dec 2016 20:22:36 +0100 Subject: misc: atmel-ssc: register as sound DAI if #sound-dai-cells is present The SSC is currently not usable with the ASoC simple-audio-card, as every SSC audio user has to build a platform driver that may do as little as calling atmel_ssc_set_audio/atmel_ssc_put_audio (which allocates the SSC and registers a DAI with the ASoC subsystem). So, have that happen automatically, if the #sound-dai-cells property is present in devicetree, which it has to be anyway for simple audio card to work. Signed-off-by: Peter Rosin Acked-by: Rob Herring Acked-by: Nicolas Ferre Signed-off-by: Mark Brown --- include/linux/atmel-ssc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/atmel-ssc.h b/include/linux/atmel-ssc.h index 7c0f6549898b..fdb545101ede 100644 --- a/include/linux/atmel-ssc.h +++ b/include/linux/atmel-ssc.h @@ -20,6 +20,7 @@ struct ssc_device { int user; int irq; bool clk_from_rk_pin; + bool sound_dai; }; struct ssc_device * __must_check ssc_request(unsigned int ssc_num); -- cgit v1.2.3 From 56e3d1cd05cc7b24cfcae8714b0661bf607aaba3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 15 Dec 2016 06:01:10 +0100 Subject: kref: prefer atomic_inc_not_zero to atomic_add_unless On most platforms, there exists this ifdef: #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) This makes this patch functionally useless. However, on PPC, there is actually an explicit definition of atomic_inc_not_zero with its own assembly that is slightly more optimized than atomic_add_unless. So, this patch changes kref to use atomic_inc_not_zero instead, for PPC and any future platforms that might provide an explicit implementation. This also puts this usage of kref more in line with a verbatim reading of the examples in Paul McKenney's paper [1] in the section titled "2.4 Atomic Counting With Check and Release Memory Barrier", which uses atomic_inc_not_zero. [1] http://open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2167.pdf Signed-off-by: Jason A. Donenfeld Reviewed-by: Thomas Hellstrom Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161215050110.3241-1-Jason@zx2c4.com --- include/linux/kref.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kref.h b/include/linux/kref.h index e15828fd71f1..62f0a84ae94e 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -133,6 +133,6 @@ static inline int kref_put_mutex(struct kref *kref, */ static inline int __must_check kref_get_unless_zero(struct kref *kref) { - return atomic_add_unless(&kref->refcount, 1, 0); + return atomic_inc_not_zero(&kref->refcount); } #endif /* _KREF_H_ */ -- cgit v1.2.3 From fd50d71f94fb1c8614098949db068cd4c8dbb91d Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Tue, 13 Dec 2016 15:51:13 +0100 Subject: hwrng: core - Move hwrng miscdev minor number to include/linux/miscdevice.h This patch move the define for hwrng's miscdev minor number to include/linux/miscdevice.h. It's better that all minor number are in the same place. Rename it to HWRNG_MINOR (from RNG_MISCDEV_MINOR) in he process since no other miscdev define have MISCDEV in their name. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- include/linux/miscdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index ed30d5d713e3..5d81f739aa0a 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -31,6 +31,7 @@ #define SGI_MMTIMER 153 #define STORE_QUEUE_MINOR 155 /* unused */ #define I2O_MINOR 166 +#define HWRNG_MINOR 183 #define MICROCODE_MINOR 184 #define IRNET_MINOR 187 #define VFIO_MINOR 196 -- cgit v1.2.3 From cf4a7207b1cb4a3c3fe3aa11a83c9d673722a7f5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 22 Dec 2016 14:45:14 +0000 Subject: lib: Add a simple prime number generator Prime numbers are interesting for testing components that use multiplies and divides, such as testing DRM's struct drm_mm alignment computations. v2: Move to lib/, add selftest v3: Fix initial constants (exclude 0/1 from being primes) v4: More RCU markup to keep 0day/sparse happy v5: Fix RCU unwind on module exit, add to kselftests v6: Tidy computation of bitmap size v7: for_each_prime_number_from() v8: Compose small-primes using BIT() for easier verification v9: Move rcu dance entirely into callers. v10: Improve quote for Betrand's Postulate (aka Chebyshev's theorem) Signed-off-by: Chris Wilson Cc: Lukas Wunner Reviewed-by: Joonas Lahtinen Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161222144514.3911-1-chris@chris-wilson.co.uk --- include/linux/prime_numbers.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 include/linux/prime_numbers.h (limited to 'include/linux') diff --git a/include/linux/prime_numbers.h b/include/linux/prime_numbers.h new file mode 100644 index 000000000000..14ec4f567342 --- /dev/null +++ b/include/linux/prime_numbers.h @@ -0,0 +1,37 @@ +#ifndef __LINUX_PRIME_NUMBERS_H +#define __LINUX_PRIME_NUMBERS_H + +#include + +bool is_prime_number(unsigned long x); +unsigned long next_prime_number(unsigned long x); + +/** + * for_each_prime_number - iterate over each prime upto a value + * @prime: the current prime number in this iteration + * @max: the upper limit + * + * Starting from the first prime number 2 iterate over each prime number up to + * the @max value. On each iteration, @prime is set to the current prime number. + * @max should be less than ULONG_MAX to ensure termination. To begin with + * @prime set to 1 on the first iteration use for_each_prime_number_from() + * instead. + */ +#define for_each_prime_number(prime, max) \ + for_each_prime_number_from((prime), 2, (max)) + +/** + * for_each_prime_number_from - iterate over each prime upto a value + * @prime: the current prime number in this iteration + * @from: the initial value + * @max: the upper limit + * + * Starting from @from iterate over each successive prime number up to the + * @max value. On each iteration, @prime is set to the current prime number. + * @max should be less than ULONG_MAX, and @from less than @max, to ensure + * termination. + */ +#define for_each_prime_number_from(prime, from, max) \ + for (prime = (from); prime <= (max); prime = next_prime_number(prime)) + +#endif /* !__LINUX_PRIME_NUMBERS_H */ -- cgit v1.2.3 From a1d82aff5df760d933b6ea3a03805dbc2bd73eb8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 27 Dec 2016 14:49:02 -0500 Subject: kernfs: make kernfs_open_file->mmapped a bitfield More kernfs_open_file->mutex synchronized flags are planned to be added. Convert ->mmapped to a bitfield in preparation. While at it, make kernfs_fop_mmap() use "true" instead of "1" on ->mmapped. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Acked-by: Acked-by: Zefan Li --- include/linux/kernfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 7056238fd9f5..afd4e5abc4fb 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -185,7 +185,7 @@ struct kernfs_open_file { char *prealloc_buf; size_t atomic_write_len; - bool mmapped; + bool mmapped:1; const struct vm_operations_struct *vm_ops; }; -- cgit v1.2.3 From 0e67db2f9fe91937e798e3d7d22c50a8438187e1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 27 Dec 2016 14:49:03 -0500 Subject: kernfs: add kernfs_ops->open/release() callbacks Add ->open/release() methods to kernfs_ops. ->open() is called when the file is opened and ->release() when the file is either released or severed. These callbacks can be used, for example, to manage persistent caching objects over multiple seq_file iterations. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Acked-by: Acked-by: Zefan Li --- include/linux/kernfs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index afd4e5abc4fb..a9b11b8d06f2 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -46,6 +46,7 @@ enum kernfs_node_flag { KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, KERNFS_EMPTY_DIR = 0x1000, + KERNFS_HAS_RELEASE = 0x2000, }; /* @flags for kernfs_create_root() */ @@ -175,6 +176,7 @@ struct kernfs_open_file { /* published fields */ struct kernfs_node *kn; struct file *file; + struct seq_file *seq_file; void *priv; /* private fields, do not use outside kernfs proper */ @@ -186,10 +188,18 @@ struct kernfs_open_file { size_t atomic_write_len; bool mmapped:1; + bool released:1; const struct vm_operations_struct *vm_ops; }; struct kernfs_ops { + /* + * Optional open/release methods. Both are called with + * @of->seq_file populated. + */ + int (*open)(struct kernfs_open_file *of); + void (*release)(struct kernfs_open_file *of); + /* * Read is handled by either seq_file or raw_read(). * -- cgit v1.2.3 From e90cbebc3fa5caea4c8bfeb0d0157a0cee53efc7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 27 Dec 2016 14:49:03 -0500 Subject: cgroup add cftype->open/release() callbacks Pipe the newly added kernfs->open/release() callbacks through cftype. While at it, as cleanup operations now can be performed from ->release() instead of ->seq_stop(), make the latter optional. Signed-off-by: Tejun Heo Acked-by: Acked-by: Zefan Li --- include/linux/cgroup-defs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 861b4677fc5b..8a916dc96e84 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -388,6 +388,9 @@ struct cftype { struct list_head node; /* anchored at ss->cfts */ struct kernfs_ops *kf_ops; + int (*open)(struct kernfs_open_file *of); + void (*release)(struct kernfs_open_file *of); + /* * read_u64() is a shortcut for the common case of returning a * single integer. Use it in place of read() -- cgit v1.2.3 From 5f617ebbdf10abd49312a89e3b894b927c7367f5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 27 Dec 2016 14:49:05 -0500 Subject: cgroup: reorder css_set fields Reorder css_set fields so that they're roughly in the order of how hot they are. The rough order is 1. the actual csses 2. reference counter and the default cgroup pointer. 3. task lists and iterations 4. fields used during merge including css_set lookup 5. the rest Signed-off-by: Tejun Heo Acked-by: Acked-by: Zefan Li --- include/linux/cgroup-defs.h | 54 ++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8a916dc96e84..3c02404cfce9 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -148,14 +148,18 @@ struct cgroup_subsys_state { * set for a task. */ struct css_set { - /* Reference count */ - atomic_t refcount; - /* - * List running through all cgroup groups in the same hash - * slot. Protected by css_set_lock + * Set of subsystem states, one for each subsystem. This array is + * immutable after creation apart from the init_css_set during + * subsystem registration (at boot time). */ - struct hlist_node hlist; + struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + + /* reference count */ + atomic_t refcount; + + /* the default cgroup associated with this css_set */ + struct cgroup *dfl_cgrp; /* * Lists running through all tasks using this cgroup group. @@ -167,21 +171,29 @@ struct css_set { struct list_head tasks; struct list_head mg_tasks; + /* all css_task_iters currently walking this cset */ + struct list_head task_iters; + /* - * List of cgrp_cset_links pointing at cgroups referenced from this - * css_set. Protected by css_set_lock. + * On the default hierarhcy, ->subsys[ssid] may point to a css + * attached to an ancestor instead of the cgroup this css_set is + * associated with. The following node is anchored at + * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to + * iterate through all css's attached to a given cgroup. */ - struct list_head cgrp_links; + struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; - /* the default cgroup associated with this css_set */ - struct cgroup *dfl_cgrp; + /* + * List running through all cgroup groups in the same hash + * slot. Protected by css_set_lock + */ + struct hlist_node hlist; /* - * Set of subsystem states, one for each subsystem. This array is - * immutable after creation apart from the init_css_set during - * subsystem registration (at boot time). + * List of cgrp_cset_links pointing at cgroups referenced from this + * css_set. Protected by css_set_lock. */ - struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + struct list_head cgrp_links; /* * List of csets participating in the on-going migration either as @@ -201,18 +213,6 @@ struct css_set { struct cgroup *mg_dst_cgrp; struct css_set *mg_dst_cset; - /* - * On the default hierarhcy, ->subsys[ssid] may point to a css - * attached to an ancestor instead of the cgroup this css_set is - * associated with. The following node is anchored at - * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to - * iterate through all css's attached to a given cgroup. - */ - struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; - - /* all css_task_iters currently walking this cset */ - struct list_head task_iters; - /* dead and being drained, ignore for migration */ bool dead; -- cgit v1.2.3 From 7b4632f048415263669676dda20fd5d811c3d3e4 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sat, 24 Dec 2016 23:28:35 +0800 Subject: cgroup: fix a comment typo Fix a comment typo in cgroup.h. Signed-off-by: Geliang Tang Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c83c23f0577b..f6b43fbb141c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -266,7 +266,7 @@ void css_task_iter_end(struct css_task_iter *it); * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset * @leader: the loop cursor * @dst_css: the destination css - * @tset: takset to iterate + * @tset: taskset to iterate * * Iterate threadgroup leaders of @tset. For single-task migrations, @tset * may not contain any. -- cgit v1.2.3 From 3d48b53fb2ae37158e700ffef3f45461ff15c965 Mon Sep 17 00:00:00 2001 From: Matthias Tafelmeier Date: Thu, 29 Dec 2016 21:37:21 +0100 Subject: net: dev_weight: TX/RX orthogonality Oftenly, introducing side effects on packet processing on the other half of the stack by adjusting one of TX/RX via sysctl is not desirable. There are cases of demand for asymmetric, orthogonal configurability. This holds true especially for nodes where RPS for RFS usage on top is configured and therefore use the 'old dev_weight'. This is quite a common base configuration setup nowadays, even with NICs of superior processing support (e.g. aRFS). A good example use case are nodes acting as noSQL data bases with a large number of tiny requests and rather fewer but large packets as responses. It's affordable to have large budget and rx dev_weights for the requests. But as a side effect having this large a number on TX processed in one run can overwhelm drivers. This patch therefore introduces an independent configurability via sysctl to userland. Signed-off-by: Matthias Tafelmeier Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 994f7423a74b..ecd78b3c9aba 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3795,6 +3795,10 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; extern int weight_p; +extern int dev_weight_rx_bias; +extern int dev_weight_tx_bias; +extern int dev_rx_weight; +extern int dev_tx_weight; bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, -- cgit v1.2.3 From f641d3b536e559dd2bae9245ec2e7d86cdf623fd Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 29 Dec 2016 21:48:24 +0100 Subject: dma-buf: use preferred struct reference in kernel-doc sed -e 's/\( \* .*\)struct &\([_a-z]*\)/\1\&struct \2/' -i Originally I wasnt a friend of this style because I thought a line-break between the "&struct" and "foo" part would break it. But a quick test shows that " * &struct \n * foo\n" works pefectly well with current kernel-doc. So time to mass-apply these changes! Cc: Sumit Semwal Cc: Jani Nikula Cc: Chris Wilson Reviewed-by: David Herrmann Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1483044517-5770-4-git-send-email-daniel.vetter@ffwll.ch --- include/linux/dma-buf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 57828154e440..4d61fc55278b 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -278,7 +278,7 @@ struct dma_buf_ops { * Shared dma buffers are reference counted using dma_buf_put() and * get_dma_buf(). * - * Device DMA access is handled by the separate struct &dma_buf_attachment. + * Device DMA access is handled by the separate &struct dma_buf_attachment. */ struct dma_buf { size_t size; @@ -355,7 +355,7 @@ struct dma_buf_export_info { * DEFINE_DMA_BUF_EXPORT_INFO - helper macro for exporters * @name: export-info name * - * DEFINE_DMA_BUF_EXPORT_INFO macro defines the struct &dma_buf_export_info, + * DEFINE_DMA_BUF_EXPORT_INFO macro defines the &struct dma_buf_export_info, * zeroes it out and pre-populates exp_name in it. */ #define DEFINE_DMA_BUF_EXPORT_INFO(name) \ -- cgit v1.2.3 From e9b4d7b56f293ed4de9ff7d16759d33492f83180 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 29 Dec 2016 21:48:25 +0100 Subject: dma-buf: Use recommended structure member reference I just learned that &struct_name.member_name works and looks pretty even. It doesn't (yet) link to the member directly though, which would be really good for big structures or vfunc tables (where the per-member kerneldoc tends to be long). Cc: Sumit Semwal Cc: Jani Nikula Cc: Chris Wilson Reviewed-by: David Herrmann Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1483044517-5770-5-git-send-email-daniel.vetter@ffwll.ch --- include/linux/dma-buf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 4d61fc55278b..bfb3704fc6fc 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -66,8 +66,8 @@ struct dma_buf_ops { * is not the case, and the allocation cannot be moved, it should also * fail the attach operation. * - * Any exporter-private housekeeping data can be stored in the priv - * pointer of &dma_buf_attachment structure. + * Any exporter-private housekeeping data can be stored in the + * &dma_buf_attachment.priv pointer. * * This callback is optional. * @@ -106,7 +106,7 @@ struct dma_buf_ops { * * Note that any specific buffer attributes required for this function * should get added to device_dma_parameters accessible via - * device->dma_params from the &dma_buf_attachment. The @attach callback + * &device.dma_params from the &dma_buf_attachment. The @attach callback * should also check these constraints. * * If this is being called for the first time, the exporter can now -- cgit v1.2.3 From 58ae74683ae2c07cd717a91799edb50231061938 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 19 Dec 2016 12:25:32 +0100 Subject: fscrypt: factor out bio specific functions That way we can get rid of the direct dependency on CONFIG_BLOCK. Fixes: d475a507457b ("ubifs: Add skeleton for fscrypto") Reported-by: Arnd Bergmann Reported-by: Randy Dunlap Reviewed-by: Eric Biggers Reviewed-by: Christoph Hellwig Reviewed-by: David Gstir Signed-off-by: Richard Weinberger Signed-off-by: Theodore Ts'o --- include/linux/fscrypto.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h index c074b670aa99..2a2815702095 100644 --- a/include/linux/fscrypto.h +++ b/include/linux/fscrypto.h @@ -174,11 +174,8 @@ extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, u64, gfp_t); extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, unsigned int, u64); -extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *); -extern void fscrypt_pullback_bio_page(struct page **, bool); extern void fscrypt_restore_control_page(struct page *); -extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, - unsigned int); + /* policy.c */ extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); extern int fscrypt_ioctl_get_policy(struct file *, void __user *); @@ -201,6 +198,12 @@ extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, const struct fscrypt_str *, struct fscrypt_str *); extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, struct fscrypt_str *); + +/* bio.c */ +extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *); +extern void fscrypt_pullback_bio_page(struct page **, bool); +extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, + unsigned int); #endif /* crypto.c */ -- cgit v1.2.3 From e8f1cb507d01205e03f69809af4347ed8ec9db5b Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sun, 1 Jan 2017 13:57:00 +0200 Subject: qed*: Update to dual-license Since the submission of the qedr driver, there's inconsistency in the licensing of the various qed/qede files - some are GPLv2 and some are dual-license. Since qedr requires dual-license and it's dependent on both, we're updating the licensing of all qed/qede source files. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 33 +++++++++++++++++++++++++++++---- include/linux/qed/eth_common.h | 32 ++++++++++++++++++++++++++++---- include/linux/qed/iscsi_common.h | 32 ++++++++++++++++++++++++++++---- include/linux/qed/qed_chain.h | 34 +++++++++++++++++++++++++++++----- include/linux/qed/qed_eth_if.h | 32 ++++++++++++++++++++++++++++---- include/linux/qed/qed_if.h | 35 +++++++++++++++++++++++++++++------ include/linux/qed/qed_iov_if.h | 32 ++++++++++++++++++++++++++++---- include/linux/qed/qed_iscsi_if.h | 32 ++++++++++++++++++++++++++++---- include/linux/qed/qed_ll2_if.h | 31 +++++++++++++++++++++++++++---- include/linux/qed/qed_roce_if.h | 2 +- include/linux/qed/qede_roce.h | 2 +- include/linux/qed/rdma_common.h | 32 ++++++++++++++++++++++++++++---- include/linux/qed/roce_common.h | 32 ++++++++++++++++++++++++++++---- include/linux/qed/storage_common.h | 32 ++++++++++++++++++++++++++++---- include/linux/qed/tcp_common.h | 32 ++++++++++++++++++++++++++++---- 15 files changed, 368 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 734deb094618..c33080baf38c 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -1,10 +1,35 @@ /* QLogic qed NIC Driver - * Copyright (c) 2015 QLogic Corporation + * Copyright (c) 2015-2016 QLogic Corporation * - * This software is available under the terms of the GNU General Public License - * (GPL) Version 2, available from the file COPYING in the main directory of - * this source tree. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ + #ifndef _COMMON_HSI_H #define _COMMON_HSI_H #include diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h index 1aa0727c4136..4b402fb0eaad 100644 --- a/include/linux/qed/eth_common.h +++ b/include/linux/qed/eth_common.h @@ -1,9 +1,33 @@ /* QLogic qed NIC Driver - * Copyright (c) 2015 QLogic Corporation + * Copyright (c) 2015-2017 QLogic Corporation * - * This software is available under the terms of the GNU General Public License - * (GPL) Version 2, available from the file COPYING in the main directory of - * this source tree. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifndef __ETH_COMMON__ diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h index 8f64b1223c2f..4c5747babcf6 100644 --- a/include/linux/qed/iscsi_common.h +++ b/include/linux/qed/iscsi_common.h @@ -1,9 +1,33 @@ /* QLogic qed NIC Driver - * Copyright (c) 2015 QLogic Corporation + * Copyright (c) 2015-2017 QLogic Corporation * - * This software is available under the terms of the GNU General Public License - * (GPL) Version 2, available from the file COPYING in the main directory of - * this source tree. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifndef __ISCSI_COMMON__ diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 37dfba101c6c..5cd7a4608c9b 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -1,9 +1,33 @@ /* QLogic qed NIC Driver - * Copyright (c) 2015 QLogic Corporation - * - * This software is available under the terms of the GNU General Public License - * (GPL) Version 2, available from the file COPYING in the main directory of - * this source tree. + * Copyright (c) 2015-2017 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifndef _QED_CHAIN_H diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 7a52f7c58c37..d91651ecfd26 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -1,9 +1,33 @@ /* QLogic qed NIC Driver - * Copyright (c) 2015 QLogic Corporation + * Copyright (c) 2015-2017 QLogic Corporation * - * This software is available under the terms of the GNU General Public License - * (GPL) Version 2, available from the file COPYING in the main directory of - * this source tree. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifndef _QED_ETH_IF_H diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 4b454f4f5b25..d1576a2bcfc9 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -1,10 +1,33 @@ /* QLogic qed NIC Driver - * - * Copyright (c) 2015 QLogic Corporation - * - * This software is available under the terms of the GNU General Public License - * (GPL) Version 2, available from the file COPYING in the main directory of - * this source tree. + * Copyright (c) 2015-2017 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifndef _QED_IF_H diff --git a/include/linux/qed/qed_iov_if.h b/include/linux/qed/qed_iov_if.h index 5a4f8d0899e9..b1f979135f97 100644 --- a/include/linux/qed/qed_iov_if.h +++ b/include/linux/qed/qed_iov_if.h @@ -1,9 +1,33 @@ /* QLogic qed NIC Driver - * Copyright (c) 2015 QLogic Corporation + * Copyright (c) 2015-2017 QLogic Corporation * - * This software is available under the terms of the GNU General Public License - * (GPL) Version 2, available from the file COPYING in the main directory of - * this source tree. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifndef _QED_IOV_IF_H diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h index d27912480cb3..f70bb81b8b6a 100644 --- a/include/linux/qed/qed_iscsi_if.h +++ b/include/linux/qed/qed_iscsi_if.h @@ -1,9 +1,33 @@ /* QLogic qed NIC Driver - * Copyright (c) 2015 QLogic Corporation + * Copyright (c) 2015-2017 QLogic Corporation * - * This software is available under the terms of the GNU General Public License - * (GPL) Version 2, available from the file COPYING in the main directory of - * this source tree. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifndef _QED_ISCSI_IF_H diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index fd75c265dba3..4fb4666ea879 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -1,10 +1,33 @@ /* QLogic qed NIC Driver + * Copyright (c) 2015-2017 QLogic Corporation * - * Copyright (c) 2015 QLogic Corporation + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * This software is available under the terms of the GNU General Public License - * (GPL) Version 2, available from the file COPYING in the main directory of - * this source tree. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifndef _QED_LL2_IF_H diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index 53047d3fa678..f742d4312c9d 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -1,5 +1,5 @@ /* QLogic qed NIC Driver - * Copyright (c) 2015-2016 QLogic Corporation + * Copyright (c) 2015-2017 QLogic Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/qed/qede_roce.h b/include/linux/qed/qede_roce.h index f48d64b0e2fb..3b8dd551a98c 100644 --- a/include/linux/qed/qede_roce.h +++ b/include/linux/qed/qede_roce.h @@ -1,5 +1,5 @@ /* QLogic qedr NIC Driver - * Copyright (c) 2015-2016 QLogic Corporation + * Copyright (c) 2015-2017 QLogic Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h index 7663725faa94..f773aa5e746f 100644 --- a/include/linux/qed/rdma_common.h +++ b/include/linux/qed/rdma_common.h @@ -1,9 +1,33 @@ /* QLogic qed NIC Driver - * Copyright (c) 2015 QLogic Corporation + * Copyright (c) 2015-2017 QLogic Corporation * - * This software is available under the terms of the GNU General Public License - * (GPL) Version 2, available from the file COPYING in the main directory of - * this source tree. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifndef __RDMA_COMMON__ diff --git a/include/linux/qed/roce_common.h b/include/linux/qed/roce_common.h index 2eeaf3dc6646..bad02df213df 100644 --- a/include/linux/qed/roce_common.h +++ b/include/linux/qed/roce_common.h @@ -1,9 +1,33 @@ /* QLogic qed NIC Driver - * Copyright (c) 2015 QLogic Corporation + * Copyright (c) 2015-2017 QLogic Corporation * - * This software is available under the terms of the GNU General Public License - * (GPL) Version 2, available from the file COPYING in the main directory of - * this source tree. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifndef __ROCE_COMMON__ diff --git a/include/linux/qed/storage_common.h b/include/linux/qed/storage_common.h index 3b8e1efd9bc2..03f3e37ab059 100644 --- a/include/linux/qed/storage_common.h +++ b/include/linux/qed/storage_common.h @@ -1,9 +1,33 @@ /* QLogic qed NIC Driver - * Copyright (c) 2015 QLogic Corporation + * Copyright (c) 2015-2017 QLogic Corporation * - * This software is available under the terms of the GNU General Public License - * (GPL) Version 2, available from the file COPYING in the main directory of - * this source tree. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifndef __STORAGE_COMMON__ diff --git a/include/linux/qed/tcp_common.h b/include/linux/qed/tcp_common.h index dc3889d1bbe6..46fe7856f1b2 100644 --- a/include/linux/qed/tcp_common.h +++ b/include/linux/qed/tcp_common.h @@ -1,9 +1,33 @@ /* QLogic qed NIC Driver - * Copyright (c) 2015 QLogic Corporation + * Copyright (c) 2015-2017 QLogic Corporation * - * This software is available under the terms of the GNU General Public License - * (GPL) Version 2, available from the file COPYING in the main directory of - * this source tree. + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #ifndef __TCP_COMMON__ -- cgit v1.2.3 From f29ffdb65ff0eaf95d2a2b80f0dee3fbd5a64772 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sun, 1 Jan 2017 13:57:07 +0200 Subject: qed*: RSS indirection based on queue-handles A step toward having qede agnostic to the queue configurations in firmware/hardware - let the RSS indirections use queue handles instead of actual queue indices. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index d91651ecfd26..3613d63cd5d0 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -77,7 +77,7 @@ struct qed_dev_eth_info { }; struct qed_update_vport_rss_params { - u16 rss_ind_table[128]; + void *rss_ind_table[128]; u32 rss_key[10]; u8 rss_caps; }; -- cgit v1.2.3 From f990c82c385b1d9ce6acadb668df313c693cf48f Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sun, 1 Jan 2017 13:57:08 +0200 Subject: qed*: Add support for ndo_set_vf_trust Trusted VFs would be allowed to receive promiscuous and multicast promiscuous data. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_iov_if.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_iov_if.h b/include/linux/qed/qed_iov_if.h index b1f979135f97..ac2e6a3199a3 100644 --- a/include/linux/qed/qed_iov_if.h +++ b/include/linux/qed/qed_iov_if.h @@ -53,6 +53,8 @@ struct qed_iov_hv_ops { int (*set_rate) (struct qed_dev *cdev, int vfid, u32 min_rate, u32 max_rate); + + int (*set_trust) (struct qed_dev *cdev, int vfid, bool trust); }; #endif -- cgit v1.2.3 From 31b95c9bdc20663a20b3261303c2a5fc34aae133 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Fri, 30 Dec 2016 13:56:46 +0100 Subject: net: stmmac: remove unused duplicate property snps,axi_all For core revision 3.x Address-Aligned Beats is available in two registers. The DT property snps,aal was created for AAL in the DMA bus register, which is a read/write bit. The DT property snps,axi_all was created for AXI_AAL in the AXI bus mode register, which is a read only bit that reflects the value of AAL in the DMA bus register. Since the value of snps,axi_all is never used in the driver, and since the property was created for a bit that is read only, it should be safe to remove the property. Acked-by: Giuseppe Cavallaro Signed-off-by: Niklas Cassel Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 266dab9ad782..889e0e9a3f1c 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -103,7 +103,6 @@ struct stmmac_axi { u32 axi_wr_osr_lmt; u32 axi_rd_osr_lmt; bool axi_kbbe; - bool axi_axi_all; u32 axi_blen[AXI_BLEN]; bool axi_fb; bool axi_mb; -- cgit v1.2.3 From 7b13558f242747db90e52fdc510441a2ed0bafba Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 2 Jan 2017 11:37:38 +0200 Subject: net/mlx5: Fix offset naming for reserved fields in hca_cap_bits Fix offset for reserved fields. Fixes: 7486216b3a0b ("{net,IB}/mlx5: mlx5_ifc updates") Fixes: b4ff3a36d3e4 ("net/mlx5: Use offset based reserved field names in the IFC header file") Fixes: 7d5e14237a55 ("net/mlx5: Update mlx5_ifc hardware features") Signed-off-by: Max Gurtovoy Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 57bec544e20a..4792c856531e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -826,9 +826,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_1a9[0x2]; u8 local_ca_ack_delay[0x5]; u8 port_module_event[0x1]; - u8 reserved_at_1b0[0x1]; + u8 reserved_at_1b1[0x1]; u8 ports_check[0x1]; - u8 reserved_at_1b2[0x1]; + u8 reserved_at_1b3[0x1]; u8 disable_link_up[0x1]; u8 beacon_led[0x1]; u8 port_type[0x2]; @@ -858,7 +858,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 compact_address_vector[0x1]; u8 striding_rq[0x1]; - u8 reserved_at_201[0x2]; + u8 reserved_at_202[0x2]; u8 ipoib_basic_offloads[0x1]; u8 reserved_at_205[0xa]; u8 drain_sigerr[0x1]; @@ -1009,10 +1009,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 rndv_offload_rc[0x1]; u8 rndv_offload_dc[0x1]; u8 log_tag_matching_list_sz[0x5]; - u8 reserved_at_5e8[0x3]; + u8 reserved_at_5f8[0x3]; u8 log_max_xrq[0x5]; - u8 reserved_at_5f0[0x200]; + u8 reserved_at_600[0x200]; }; enum mlx5_flow_destination_type { -- cgit v1.2.3 From bcda1aca7712539439d53dd5351c6223e03bf6e1 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Mon, 2 Jan 2017 11:37:41 +0200 Subject: net/mlx5: Support new MR features This patch adds the following items to IFC file. 1. MLX5_MKC_ACCESS_MODE_KSM enum value for creating KSM memory keys. KSM access mode used when indirect MKey associated with fixed memory size entries. 2. null_mkey field that is used to indicate non-present KLM/KSM entries, where it causes the device to generate page fault event when trying to access it. 3. struct mlx5_ifc_cmd_hca_cap_bits capability bits indicating related value/field is supported: * fixed_buffer_size - MLX5_MKC_ACCESS_MODE_KSM * umr_extended_translation_offset - translation_offset_42_16 in UMR ctrl segment * null_mkey - null_mkey in QUERY_SPECIAL_CONTEXTS Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4792c856531e..7c760e580268 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -782,11 +782,12 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_eq[0x4]; u8 max_indirection[0x8]; - u8 reserved_at_108[0x1]; + u8 fixed_buffer_size[0x1]; u8 log_max_mrw_sz[0x7]; u8 reserved_at_110[0x2]; u8 log_max_bsf_list_size[0x6]; - u8 reserved_at_118[0x2]; + u8 umr_extended_translation_offset[0x1]; + u8 null_mkey[0x1]; u8 log_max_klm_list_size[0x6]; u8 reserved_at_120[0xa]; @@ -2569,6 +2570,7 @@ enum { MLX5_MKC_ACCESS_MODE_PA = 0x0, MLX5_MKC_ACCESS_MODE_MTT = 0x1, MLX5_MKC_ACCESS_MODE_KLMS = 0x2, + MLX5_MKC_ACCESS_MODE_KSM = 0x3, }; struct mlx5_ifc_mkc_bits { @@ -3677,6 +3679,10 @@ struct mlx5_ifc_query_special_contexts_out_bits { u8 dump_fill_mkey[0x20]; u8 resd_lkey[0x20]; + + u8 null_mkey[0x20]; + + u8 reserved_at_a0[0x60]; }; struct mlx5_ifc_query_special_contexts_in_bits { -- cgit v1.2.3 From 3161625589c1d7c54e949d462f4d0c327664881a Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Mon, 2 Jan 2017 11:37:42 +0200 Subject: IB/mlx5: Refactor UMR post send format * Update struct mlx5_wqe_umr_ctrl_seg. * Currenlty UMR send_flags aim only certain use cases: enabled/disable cached MR, modifying XLT for ODP. By making flags independent make UMR more flexible allowing arbitrary manipulations. * Since different UMR formats have different entry sizes UMR request should receive exact size of translation table update instead of number of entries. Rename field npages to xlt_size in struct mlx5_umr_wr and update relevant code accordingly. * Add support of length64 bit. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/qp.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 0aacb2a7480d..693811e0cb24 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -292,10 +292,14 @@ struct mlx5_wqe_data_seg { struct mlx5_wqe_umr_ctrl_seg { u8 flags; u8 rsvd0[3]; - __be16 klm_octowords; - __be16 bsf_octowords; + __be16 xlt_octowords; + union { + __be16 xlt_offset; + __be16 bsf_octowords; + }; __be64 mkey_mask; - u8 rsvd1[32]; + __be32 xlt_offset_47_16; + u8 rsvd1[28]; }; struct mlx5_seg_set_psv { @@ -389,6 +393,10 @@ struct mlx5_bsf { struct mlx5_bsf_inl m_inl; }; +struct mlx5_mtt { + __be64 ptag; +}; + struct mlx5_klm { __be32 bcount; __be32 key; -- cgit v1.2.3 From 7d0cc6edcc7011133c45f62a7796a98b8cb5da0f Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Mon, 2 Jan 2017 11:37:44 +0200 Subject: IB/mlx5: Add MR cache for large UMR regions In this change we turn mlx5_ib_update_mtt() into generic mlx5_ib_update_xlt() to perfrom HCA translation table modifiactions supporting both atomic and process contexts and not limited by number of modified entries. Using this function we increase preallocated MRs up to 16GB. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0ae55361e674..ec52f3b50bf5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -959,7 +959,7 @@ enum { }; enum { - MAX_MR_CACHE_ENTRIES = 16, + MAX_MR_CACHE_ENTRIES = 21, }; enum { -- cgit v1.2.3 From 223cdc72429079aaf72511d2677b5d6584866313 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Mon, 2 Jan 2017 11:37:45 +0200 Subject: net/mlx5: Update PAGE_FAULT_RESUME layout Update PAGE_FAULT_RESUME command layout. Three bit fields describing page fault: rdma, rdma_write, req_res gave 8 possible combinations, while only a few were legal. Now they are interpreted as three-bit type field, where former legal combinations turns into corresponding types and unused were added as new types. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7c760e580268..608dc988b3d6 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4775,12 +4775,11 @@ struct mlx5_ifc_page_fault_resume_in_bits { u8 error[0x1]; u8 reserved_at_41[0x4]; - u8 rdma[0x1]; - u8 read_write[0x1]; - u8 req_res[0x1]; - u8 qpn[0x18]; + u8 page_fault_type[0x3]; + u8 wq_number[0x18]; - u8 reserved_at_60[0x20]; + u8 reserved_at_60[0x8]; + u8 token[0x18]; }; struct mlx5_ifc_nop_out_bits { -- cgit v1.2.3 From d9aaed838765e28234cb700c7d1ac975cadf28c9 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Mon, 2 Jan 2017 11:37:46 +0200 Subject: {net,IB}/mlx5: Refactor page fault handling * Update page fault event according to last specification. * Separate code path for page fault EQ, completion EQ and async EQ. * Move page fault handling work queue from mlx5_ib static variable into mlx5_core page fault EQ. * Allocate memory to store ODP event dynamically as the events arrive, since in atomic context - use mempool. * Make mlx5_ib page fault handler run in process context. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 6 ++- include/linux/mlx5/driver.h | 97 ++++++++++++++++++++++++++++++++++++++++++--- include/linux/mlx5/qp.h | 44 -------------------- 3 files changed, 96 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 9f489365b3d3..3ccaeff15a80 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -534,7 +534,9 @@ struct mlx5_eqe_page_fault { __be16 wqe_index; u16 reserved2; __be16 packet_length; - u8 reserved3[12]; + __be32 token; + u8 reserved4[8]; + __be32 pftype_wq; } __packed wqe; struct { __be32 r_key; @@ -542,9 +544,9 @@ struct mlx5_eqe_page_fault { __be16 packet_length; __be32 rdma_op_len; __be64 rdma_va; + __be32 pftype_token; } __packed rdma; } __packed; - __be32 flags_qpn; } __packed; struct mlx5_eqe_vport_change { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ec52f3b50bf5..b52d07491fe7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -83,6 +84,7 @@ enum { MLX5_EQ_VEC_PAGES = 0, MLX5_EQ_VEC_CMD = 1, MLX5_EQ_VEC_ASYNC = 2, + MLX5_EQ_VEC_PFAULT = 3, MLX5_EQ_VEC_COMP_BASE, }; @@ -178,6 +180,14 @@ enum mlx5_port_status { MLX5_PORT_DOWN = 2, }; +enum mlx5_eq_type { + MLX5_EQ_TYPE_COMP, + MLX5_EQ_TYPE_ASYNC, +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + MLX5_EQ_TYPE_PF, +#endif +}; + struct mlx5_uuar_info { struct mlx5_uar *uars; int num_uars; @@ -333,6 +343,14 @@ struct mlx5_eq_tasklet { spinlock_t lock; }; +struct mlx5_eq_pagefault { + struct work_struct work; + /* Pagefaults lock */ + spinlock_t lock; + struct workqueue_struct *wq; + mempool_t *pool; +}; + struct mlx5_eq { struct mlx5_core_dev *dev; __be32 __iomem *doorbell; @@ -346,7 +364,13 @@ struct mlx5_eq { struct list_head list; int index; struct mlx5_rsc_debug *dbg; - struct mlx5_eq_tasklet tasklet_ctx; + enum mlx5_eq_type type; + union { + struct mlx5_eq_tasklet tasklet_ctx; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + struct mlx5_eq_pagefault pf_ctx; +#endif + }; }; struct mlx5_core_psv { @@ -377,6 +401,8 @@ struct mlx5_core_mkey { u32 pd; }; +#define MLX5_24BIT_MASK ((1 << 24) - 1) + enum mlx5_res_type { MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP, MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ, @@ -411,6 +437,9 @@ struct mlx5_eq_table { struct mlx5_eq pages_eq; struct mlx5_eq async_eq; struct mlx5_eq cmd_eq; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + struct mlx5_eq pfault_eq; +#endif int num_comp_vectors; /* protect EQs list */ @@ -497,6 +526,7 @@ struct mlx5_fc_stats { struct mlx5_eswitch; struct mlx5_lag; +struct mlx5_pagefault; struct mlx5_rl_entry { u32 rate; @@ -601,6 +631,14 @@ struct mlx5_priv { struct mlx5_rl_table rl_table; struct mlx5_port_module_event_stats pme_stats; + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + void (*pfault)(struct mlx5_core_dev *dev, + void *context, + struct mlx5_pagefault *pfault); + void *pfault_ctx; + struct srcu_struct pfault_srcu; +#endif }; enum mlx5_device_state { @@ -619,6 +657,50 @@ enum mlx5_pci_status { MLX5_PCI_STATUS_ENABLED, }; +enum mlx5_pagefault_type_flags { + MLX5_PFAULT_REQUESTOR = 1 << 0, + MLX5_PFAULT_WRITE = 1 << 1, + MLX5_PFAULT_RDMA = 1 << 2, +}; + +/* Contains the details of a pagefault. */ +struct mlx5_pagefault { + u32 bytes_committed; + u32 token; + u8 event_subtype; + u8 type; + union { + /* Initiator or send message responder pagefault details. */ + struct { + /* Received packet size, only valid for responders. */ + u32 packet_size; + /* + * Number of resource holding WQE, depends on type. + */ + u32 wq_num; + /* + * WQE index. Refers to either the send queue or + * receive queue, according to event_subtype. + */ + u16 wqe_index; + } wqe; + /* RDMA responder pagefault details */ + struct { + u32 r_key; + /* + * Received packet size, minimal size page fault + * resolution required for forward progress. + */ + u32 packet_size; + u32 rdma_op_len; + u64 rdma_va; + } rdma; + }; + + struct mlx5_eq *eq; + struct work_struct work; +}; + struct mlx5_td { struct list_head tirs_list; u32 tdn; @@ -879,15 +961,13 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); -#endif void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec); void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, struct mlx5_uar *uar); + int nent, u64 mask, const char *name, + struct mlx5_uar *uar, enum mlx5_eq_type type); int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_start_eqs(struct mlx5_core_dev *dev); int mlx5_stop_eqs(struct mlx5_core_dev *dev); @@ -926,6 +1006,10 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *odp_caps); int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, void *out, size_t sz); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, + u32 wq_num, u8 type, int error); +#endif int mlx5_init_rl_table(struct mlx5_core_dev *dev); void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); @@ -974,6 +1058,9 @@ struct mlx5_interface { void (*detach)(struct mlx5_core_dev *dev, void *context); void (*event)(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param); + void (*pfault)(struct mlx5_core_dev *dev, + void *context, + struct mlx5_pagefault *pfault); void * (*get_dev)(void *context); int protocol; struct list_head list; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 693811e0cb24..9ed775f5cb66 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -50,9 +50,6 @@ #define MLX5_BSF_APPTAG_ESCAPE 0x1 #define MLX5_BSF_APPREF_ESCAPE 0x2 -#define MLX5_QPN_BITS 24 -#define MLX5_QPN_MASK ((1 << MLX5_QPN_BITS) - 1) - enum mlx5_qp_optpar { MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, MLX5_QP_OPTPAR_RRE = 1 << 1, @@ -418,46 +415,9 @@ struct mlx5_stride_block_ctrl_seg { __be16 num_entries; }; -enum mlx5_pagefault_flags { - MLX5_PFAULT_REQUESTOR = 1 << 0, - MLX5_PFAULT_WRITE = 1 << 1, - MLX5_PFAULT_RDMA = 1 << 2, -}; - -/* Contains the details of a pagefault. */ -struct mlx5_pagefault { - u32 bytes_committed; - u8 event_subtype; - enum mlx5_pagefault_flags flags; - union { - /* Initiator or send message responder pagefault details. */ - struct { - /* Received packet size, only valid for responders. */ - u32 packet_size; - /* - * WQE index. Refers to either the send queue or - * receive queue, according to event_subtype. - */ - u16 wqe_index; - } wqe; - /* RDMA responder pagefault details */ - struct { - u32 r_key; - /* - * Received packet size, minimal size page fault - * resolution required for forward progress. - */ - u32 packet_size; - u32 rdma_op_len; - u64 rdma_va; - } rdma; - }; -}; - struct mlx5_core_qp { struct mlx5_core_rsc_common common; /* must be first */ void (*event) (struct mlx5_core_qp *, int); - void (*pfault_handler)(struct mlx5_core_qp *, struct mlx5_pagefault *); int qpn; struct mlx5_rsc_debug *dbg; int pid; @@ -557,10 +517,6 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev); void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev); int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn, - u8 context, int error); -#endif int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *rq); void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev, -- cgit v1.2.3 From 17d2f88f92ce39b348f125f6b2e6eeb6b0906ac7 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Mon, 2 Jan 2017 11:37:47 +0200 Subject: IB/mlx5: Add ODP atomics support Handle ODP atomic operations. When initiator of RDMA atomic operation use ODP MR to provide source data handle pagefault properly. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 2 +- include/linux/mlx5/qp.h | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 608dc988b3d6..15f896781966 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -328,7 +328,7 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits { u8 receive[0x1]; u8 write[0x1]; u8 read[0x1]; - u8 reserved_at_4[0x1]; + u8 atomic[0x1]; u8 srq_receive[0x1]; u8 reserved_at_6[0x1a]; }; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 9ed775f5cb66..219c699c17b7 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -212,6 +212,7 @@ struct mlx5_wqe_ctrl_seg { #define MLX5_WQE_CTRL_OPCODE_MASK 0xff #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8 +#define MLX5_WQE_AV_EXT 0x80000000 enum { MLX5_ETH_WQE_L3_INNER_CSUM = 1 << 4, @@ -242,6 +243,23 @@ struct mlx5_wqe_masked_atomic_seg { __be64 compare_mask; }; +struct mlx5_base_av { + union { + struct { + __be32 qkey; + __be32 reserved; + } qkey; + __be64 dc_key; + } key; + __be32 dqp_dct; + u8 stat_rate_sl; + u8 fl_mlid; + union { + __be16 rlid; + __be16 udp_sport; + }; +}; + struct mlx5_av { union { struct { -- cgit v1.2.3 From aa8e08d2f523501c40b0e70f1c4ecacb97195931 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Mon, 2 Jan 2017 11:37:48 +0200 Subject: IB/mlx5: Improve MR check Add "type" field to mlx5_core MKEY struct. Check whether page fault happens on MKEY corresponding to MR. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b52d07491fe7..cfa49bca009c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -394,11 +394,17 @@ struct mlx5_core_sig_ctx { u32 sigerr_count; }; +enum { + MLX5_MKEY_MR = 1, + MLX5_MKEY_MW, +}; + struct mlx5_core_mkey { u64 iova; u64 size; u32 key; u32 pd; + u32 type; }; #define MLX5_24BIT_MASK ((1 << 24) - 1) -- cgit v1.2.3 From 8e4881aa1d5d2f9c7ebfd0fe5e138f0cc345832c Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 1 Jan 2017 19:02:45 +0100 Subject: net: mdio: add mdio45_ethtool_ksettings_get There is a function in mdio for the old ethtool api gset. We add a new function mdio45_ethtool_ksettings_get for the new ethtool api glinksettings. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- include/linux/mdio.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index bf9d1d750693..b6587a4b32e7 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -130,6 +130,10 @@ extern int mdio45_nway_restart(const struct mdio_if_info *mdio); extern void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio, struct ethtool_cmd *ecmd, u32 npage_adv, u32 npage_lpa); +extern void +mdio45_ethtool_ksettings_get_npage(const struct mdio_if_info *mdio, + struct ethtool_link_ksettings *cmd, + u32 npage_adv, u32 npage_lpa); /** * mdio45_ethtool_gset - get settings for ETHTOOL_GSET @@ -147,6 +151,23 @@ static inline void mdio45_ethtool_gset(const struct mdio_if_info *mdio, mdio45_ethtool_gset_npage(mdio, ecmd, 0, 0); } +/** + * mdio45_ethtool_ksettings_get - get settings for ETHTOOL_GLINKSETTINGS + * @mdio: MDIO interface + * @cmd: Ethtool request structure + * + * Since the CSRs for auto-negotiation using next pages are not fully + * standardised, this function does not attempt to decode them. Use + * mdio45_ethtool_ksettings_get_npage() to specify advertisement bits + * from next pages. + */ +static inline void +mdio45_ethtool_ksettings_get(const struct mdio_if_info *mdio, + struct ethtool_link_ksettings *cmd) +{ + mdio45_ethtool_ksettings_get_npage(mdio, cmd, 0, 0); +} + extern int mdio_mii_ioctl(const struct mdio_if_info *mdio, struct mii_ioctl_data *mii_data, int cmd); -- cgit v1.2.3 From 7b73305160f11b633e9801b2c6b83d5b0cb867cc Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 16 Dec 2016 18:45:21 +0100 Subject: module: Drop redundant declaration of struct module Struct module is already declared at the beginning of the file, no need to declare it again. Signed-off-by: Jean Delvare Fixes: 93c2e105f6bc ("module: Optimize __module_address() using a latched RB-tree") Cc: Peter Zijlstra (Intel) Cc: Jessica Yu Cc: Rusty Russell Signed-off-by: Jessica Yu --- include/linux/module.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 7c84273d60b9..ef599379f9a4 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -282,8 +282,6 @@ enum module_state { MODULE_STATE_UNFORMED, /* Still setting it up. */ }; -struct module; - struct mod_tree_node { struct module *mod; struct latch_tree_node node; -- cgit v1.2.3 From 8afda8b26d01ee26a60ef2f0284a7f01a5ed96f8 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 28 Nov 2016 15:06:24 +0300 Subject: spi-nor: Add support for Intel SPI serial flash controller Add support for the SPI serial flash host controller found on many Intel CPUs including Baytrail and Braswell. The SPI serial flash controller is used to access BIOS and other platform specific information. By default the driver exposes a single read-only MTD device but with a module parameter "writeable=1" the MTD device can be made read-write which makes it possible to upgrade BIOS directly from Linux. Signed-off-by: Mika Westerberg Acked-by: Cyrille Pitchen Signed-off-by: Lee Jones --- include/linux/platform_data/intel-spi.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/linux/platform_data/intel-spi.h (limited to 'include/linux') diff --git a/include/linux/platform_data/intel-spi.h b/include/linux/platform_data/intel-spi.h new file mode 100644 index 000000000000..942b0c3f8f08 --- /dev/null +++ b/include/linux/platform_data/intel-spi.h @@ -0,0 +1,31 @@ +/* + * Intel PCH/PCU SPI flash driver. + * + * Copyright (C) 2016, Intel Corporation + * Author: Mika Westerberg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef INTEL_SPI_PDATA_H +#define INTEL_SPI_PDATA_H + +enum intel_spi_type { + INTEL_SPI_BYT = 1, + INTEL_SPI_LPT, + INTEL_SPI_BXT, +}; + +/** + * struct intel_spi_boardinfo - Board specific data for Intel SPI driver + * @type: Type which this controller is compatible with + * @writeable: The chip is writeable + */ +struct intel_spi_boardinfo { + enum intel_spi_type type; + bool writeable; +}; + +#endif /* INTEL_SPI_PDATA_H */ -- cgit v1.2.3 From ff00d7a32a1b88b772981a13fc198e0d29300666 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 28 Nov 2016 15:06:25 +0300 Subject: mfd: lpc_ich: Add support for SPI serial flash host controller Many Intel CPUs including Haswell, Broadwell and Baytrail have SPI serial flash host controller as part of the LPC device. This will populate an MFD cell suitable for the SPI host controller driver if we know that the LPC device has one. Signed-off-by: Mika Westerberg Acked-by: Lee Jones Signed-off-by: Lee Jones --- include/linux/mfd/lpc_ich.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h index 2b300b44f994..fba8fcb54f8c 100644 --- a/include/linux/mfd/lpc_ich.h +++ b/include/linux/mfd/lpc_ich.h @@ -20,6 +20,8 @@ #ifndef LPC_ICH_H #define LPC_ICH_H +#include + /* GPIO resources */ #define ICH_RES_GPIO 0 #define ICH_RES_GPE0 1 @@ -40,6 +42,7 @@ struct lpc_ich_info { char name[32]; unsigned int iTCO_version; unsigned int gpio_version; + enum intel_spi_type spi_type; u8 use_gpio; }; -- cgit v1.2.3 From b31ef8285b19ec5563274c574fcfe7a5993125ce Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 21 Dec 2016 09:47:03 -0800 Subject: thermal core: convert ID allocation to IDA The thermal core does not use the ability to look up pointers by ID, so convert it from using an IDR to the more space-efficient IDA. Signed-off-by: Matthew Wilcox Signed-off-by: Zhang Rui --- include/linux/thermal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index e275e98bdceb..dab11f97e1c6 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -194,7 +194,7 @@ struct thermal_attr { * @governor: pointer to the governor for this thermal zone * @governor_data: private pointer for governor data * @thermal_instances: list of &struct thermal_instance of this thermal zone - * @idr: &struct idr to generate unique id for this zone's cooling + * @ida: &struct ida to generate unique id for this zone's cooling * devices * @lock: lock to protect thermal_instances list * @node: node in thermal_tz_list (in thermal_core.c) @@ -227,7 +227,7 @@ struct thermal_zone_device { struct thermal_governor *governor; void *governor_data; struct list_head thermal_instances; - struct idr idr; + struct ida ida; struct mutex lock; struct list_head node; struct delayed_work poll_queue; -- cgit v1.2.3 From fe2858c8c6d167df33a839591ebe63ea05a69d06 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 4 Jan 2017 09:39:52 +0100 Subject: pwm: Remove pwm_can_sleep() The last user of this function has been removed, so it is no longer needed. Signed-off-by: Thierry Reding --- include/linux/pwm.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 2c6c5114c089..e15fd3ce6502 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -451,8 +451,6 @@ struct pwm_device *devm_pwm_get(struct device *dev, const char *con_id); struct pwm_device *devm_of_pwm_get(struct device *dev, struct device_node *np, const char *con_id); void devm_pwm_put(struct device *dev, struct pwm_device *pwm); - -bool pwm_can_sleep(struct pwm_device *pwm); #else static inline struct pwm_device *pwm_request(int pwm_id, const char *label) { @@ -566,11 +564,6 @@ static inline struct pwm_device *devm_of_pwm_get(struct device *dev, static inline void devm_pwm_put(struct device *dev, struct pwm_device *pwm) { } - -static inline bool pwm_can_sleep(struct pwm_device *pwm) -{ - return false; -} #endif static inline void pwm_apply_args(struct pwm_device *pwm) -- cgit v1.2.3 From 8c0216f377406c7613b67bd18755889026284192 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 4 Jan 2017 09:40:54 +0100 Subject: pwm: Remove .can_sleep from struct pwm_chip All PWM devices have been marked as "might sleep" since v4.5, there is no longer a need to differentiate on a per-chip basis. Signed-off-by: Thierry Reding --- include/linux/pwm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index e15fd3ce6502..eae215ef1b2c 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -287,8 +287,6 @@ struct pwm_ops { * @pwms: array of PWM devices allocated by the framework * @of_xlate: request a PWM device given a device tree PWM specifier * @of_pwm_n_cells: number of cells expected in the device tree PWM specifier - * @can_sleep: must be true if the .config(), .enable() or .disable() - * operations may sleep */ struct pwm_chip { struct device *dev; @@ -302,7 +300,6 @@ struct pwm_chip { struct pwm_device * (*of_xlate)(struct pwm_chip *pc, const struct of_phandle_args *args); unsigned int of_pwm_n_cells; - bool can_sleep; }; /** -- cgit v1.2.3 From 1ff8cebf49ed9e9ca2ae44b5c4176aef9c21af9c Mon Sep 17 00:00:00 2001 From: yuan linyu Date: Tue, 3 Jan 2017 20:42:17 +0800 Subject: scm: remove use CMSG{_COMPAT}_ALIGN(sizeof(struct {compat_}cmsghdr)) sizeof(struct cmsghdr) and sizeof(struct compat_cmsghdr) already aligned. remove use CMSG_ALIGN(sizeof(struct cmsghdr)) and CMSG_COMPAT_ALIGN(sizeof(struct compat_cmsghdr)) keep code consistent. Signed-off-by: yuan linyu Signed-off-by: David S. Miller --- include/linux/socket.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index b5cc5a6d7011..c06438023d79 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -92,9 +92,9 @@ struct cmsghdr { #define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) ) -#define CMSG_DATA(cmsg) ((void *)((char *)(cmsg) + CMSG_ALIGN(sizeof(struct cmsghdr)))) -#define CMSG_SPACE(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + CMSG_ALIGN(len)) -#define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + (len)) +#define CMSG_DATA(cmsg) ((void *)((char *)(cmsg) + sizeof(struct cmsghdr))) +#define CMSG_SPACE(len) (sizeof(struct cmsghdr) + CMSG_ALIGN(len)) +#define CMSG_LEN(len) (sizeof(struct cmsghdr) + (len)) #define __CMSG_FIRSTHDR(ctl,len) ((len) >= sizeof(struct cmsghdr) ? \ (struct cmsghdr *)(ctl) : \ -- cgit v1.2.3 From 63971c5682bf89e15083733dcd7b4610e789e843 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 2 Jan 2017 13:44:29 +0200 Subject: spi: pxa2xx: fix indentation of the comments in header Just for sake of readability fix the indentation of the comments in pxa2xx_ssp.h header file. Signed-off-by: Andy Shevchenko Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 2d6f0c39ed68..a0522328d7aa 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -90,9 +90,9 @@ #define SSSR_RFL_MASK (0xf << 12) /* Receive FIFO Level mask */ #define SSCR1_TFT (0x000003c0) /* Transmit FIFO Threshold (mask) */ -#define SSCR1_TxTresh(x) (((x) - 1) << 6) /* level [1..16] */ +#define SSCR1_TxTresh(x) (((x) - 1) << 6) /* level [1..16] */ #define SSCR1_RFT (0x00003c00) /* Receive FIFO Threshold (mask) */ -#define SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..16] */ +#define SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..16] */ #define RX_THRESH_CE4100_DFLT 2 #define TX_THRESH_CE4100_DFLT 2 @@ -106,9 +106,9 @@ #define CE4100_SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..4] */ /* QUARK_X1000 SSCR0 bit definition */ -#define QUARK_X1000_SSCR0_DSS (0x1F) /* Data Size Select (mask) */ -#define QUARK_X1000_SSCR0_DataSize(x) ((x) - 1) /* Data Size Select [4..32] */ -#define QUARK_X1000_SSCR0_FRF (0x3 << 5) /* FRame Format (mask) */ +#define QUARK_X1000_SSCR0_DSS (0x1F << 0) /* Data Size Select (mask) */ +#define QUARK_X1000_SSCR0_DataSize(x) ((x) - 1) /* Data Size Select [4..32] */ +#define QUARK_X1000_SSCR0_FRF (0x3 << 5) /* FRame Format (mask) */ #define QUARK_X1000_SSCR0_Motorola (0x0 << 5) /* Motorola's Serial Peripheral Interface (SPI) */ #define RX_THRESH_QUARK_X1000_DFLT 1 @@ -121,8 +121,8 @@ #define QUARK_X1000_SSCR1_TxTresh(x) (((x) - 1) << 6) /* level [1..32] */ #define QUARK_X1000_SSCR1_RFT (0x1F << 11) /* Receive FIFO Threshold (mask) */ #define QUARK_X1000_SSCR1_RxTresh(x) (((x) - 1) << 11) /* level [1..32] */ -#define QUARK_X1000_SSCR1_STRF (1 << 17) /* Select FIFO or EFWR */ -#define QUARK_X1000_SSCR1_EFWR (1 << 16) /* Enable FIFO Write/Read */ +#define QUARK_X1000_SSCR1_STRF (1 << 17) /* Select FIFO or EFWR */ +#define QUARK_X1000_SSCR1_EFWR (1 << 16) /* Enable FIFO Write/Read */ /* extra bits in PXA255, PXA26x and PXA27x SSP ports */ #define SSCR0_TISSP (1 << 4) /* TI Sync Serial Protocol */ -- cgit v1.2.3 From eac53b3664f592713655f5de59dc44bdd0cfc0bd Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 21 Dec 2016 15:36:47 +0100 Subject: power: supply: axp288_charger: Drop platform_data dependency When the axp288_charger driver was originally merged, it was merged with a dependency on some other driver providing platform data for it. However the battery-data-framework which should provide that data never got merged, so the axp288_charger as merged upstream has never worked, its probe method simply always returns -ENODEV. This commit removes the dependency on the platform_data instead reading back the charging current and charging voltage that the firmware has set and using those values as the maximum values the user may set. Signed-off-by: Hans de Goede Signed-off-by: Sebastian Reichel --- include/linux/mfd/axp20x.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index a4860bc9b73d..e460fc42d63e 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -554,13 +554,6 @@ struct axp20x_fg_pdata { int thermistor_curve[MAX_THERM_CURVE_SIZE][2]; }; -struct axp20x_chrg_pdata { - int max_cc; - int max_cv; - int def_cc; - int def_cv; -}; - struct axp288_extcon_pdata { /* GPIO pin control to switch D+/D- lines b/w PMIC and SOC */ struct gpio_desc *gpio_mux_cntl; -- cgit v1.2.3 From 888f97435a856c2c5c6ca0b3337b68d595b5639e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 14 Dec 2016 17:38:53 +0100 Subject: power: supply: axp288_fuel_gauge: Drop platform_data dependency When the axp288_faul_gauge driver was originally merged, it was merged with a dependency on some other driver providing platform data for it. However the battery-data-framework which should provide that data never got merged, resulting in x86 tablets / laptops with an axp288 having no working battery monitor, as before this commit the driver would simply return -ENODEV if there is no platform data. This commit removes the dependency on the platform_data instead checking that the firmware has initialized the fuel-gauge and reading the info back from the pmic. What is missing from the read-back info is the table to map raw adc values to temperature, so this commit drops the temperature and temperature limits properties. The min voltage, charge design and model name info is also missing. Note that none of these are really important for userspace to have. All other functionality is preserved and actually made available by this commit. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=88471 Signed-off-by: Hans de Goede Signed-off-by: Sebastian Reichel --- include/linux/mfd/axp20x.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index e460fc42d63e..812806d6319b 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -532,28 +532,6 @@ struct axp20x_dev { const struct regmap_irq_chip *regmap_irq_chip; }; -#define BATTID_LEN 64 -#define OCV_CURVE_SIZE 32 -#define MAX_THERM_CURVE_SIZE 25 -#define PD_DEF_MIN_TEMP 0 -#define PD_DEF_MAX_TEMP 55 - -struct axp20x_fg_pdata { - char battid[BATTID_LEN + 1]; - int design_cap; - int min_volt; - int max_volt; - int max_temp; - int min_temp; - int cap1; - int cap0; - int rdc1; - int rdc0; - int ocv_curve[OCV_CURVE_SIZE]; - int tcsz; - int thermistor_curve[MAX_THERM_CURVE_SIZE][2]; -}; - struct axp288_extcon_pdata { /* GPIO pin control to switch D+/D- lines b/w PMIC and SOC */ struct gpio_desc *gpio_mux_cntl; -- cgit v1.2.3 From 5952758101fb55844957a8d4fe88402d9827cfb4 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 4 Jan 2017 19:56:24 +0100 Subject: dsa: mv88e6xxx: Optimise atu_get Lookup in the ATU can be performed starting from a given MAC address. This is faster than starting with the first possible MAC address and iterating all entries. Entries are returned in numeric order. So if the MAC address returned is bigger than what we are searching for, we know it is not in the ATU. Using the benchmark provided by Volodymyr Bendiuga , https://www.spinics.net/lists/netdev/msg411550.html on an Marvell Armada 370 RD, the test to add a number of static fdb entries went from 1.616531 seconds to 0.312052 seconds. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 60 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 6fec9e81bd70..42add77ae47d 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -396,6 +396,66 @@ static inline bool ether_addr_equal_masked(const u8 *addr1, const u8 *addr2, return true; } +/** + * ether_addr_to_u64 - Convert an Ethernet address into a u64 value. + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Return a u64 value of the address + */ +static inline u64 ether_addr_to_u64(const u8 *addr) +{ + u64 u = 0; + int i; + + for (i = 0; i < ETH_ALEN; i++) + u = u << 8 | addr[i]; + + return u; +} + +/** + * u64_to_ether_addr - Convert a u64 to an Ethernet address. + * @u: u64 to convert to an Ethernet MAC address + * @addr: Pointer to a six-byte array to contain the Ethernet address + */ +static inline void u64_to_ether_addr(u64 u, u8 *addr) +{ + int i; + + for (i = ETH_ALEN - 1; i >= 0; i--) { + addr[i] = u & 0xff; + u = u >> 8; + } +} + +/** + * eth_addr_dec - Decrement the given MAC address + * + * @addr: Pointer to a six-byte array containing Ethernet address to decrement + */ +static inline void eth_addr_dec(u8 *addr) +{ + u64 u = ether_addr_to_u64(addr); + + u--; + u64_to_ether_addr(u, addr); +} + +/** + * ether_addr_greater - Compare two Ethernet addresses + * @addr1: Pointer to a six-byte array containing the Ethernet address + * @addr2: Pointer other six-byte array containing the Ethernet address + * + * Compare two Ethernet addresses, returns true addr1 is greater than addr2 + */ +static inline bool ether_addr_greater(const u8 *addr1, const u8 *addr2) +{ + u64 u1 = ether_addr_to_u64(addr1); + u64 u2 = ether_addr_to_u64(addr2); + + return u1 > u2; +} + /** * is_etherdev_addr - Tell if given Ethernet address belongs to the device. * @dev: Pointer to a device structure -- cgit v1.2.3 From 3db5e3e707ebb9ab0ce3a2dcd924ed2ea525d770 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Jan 2017 13:37:28 +0100 Subject: wireless: move IEEE80211_NUM_ACS to ieee80211.h This constant isn't really specific to mac80211, so move it "up" a level to ieee80211.h Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index fe849329511a..87d1937e4671 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -185,6 +185,8 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) /* number of user priorities 802.11 uses */ #define IEEE80211_NUM_UPS 8 +/* number of ACs */ +#define IEEE80211_NUM_ACS 4 #define IEEE80211_QOS_CTL_LEN 2 /* 1d tag mask */ -- cgit v1.2.3 From 0e377f3b9ae936aefe5aaca4c2e2546d57b63df7 Mon Sep 17 00:00:00 2001 From: Song Hongyan Date: Thu, 5 Jan 2017 18:24:04 +0800 Subject: iio: Add gravity sensor support Gravity sensor is a soft sensor, which derives value from standard accelerometer device by filtering out the acceleration which is not caused by gravity. Gravity sensor provides a three dimensional vector indicating the direction and magnitude of gravity. Typically, this sensor is used to determine the device's relative orientation in space. The units and the coordinate system is the same as the one used by the acceleration sensor. When a device is at rest, the output of the gravity sensor should be identical to that of the accelerometer. More information can be found in: http://www.usb.org/developers/hidpage/HUTRR59_-_Usages_for_Wearables.pdf Gravity sensor and accelerometer have similar channels and share channel usage ids. So the most of the code for accel_3d can be reused. Signed-off-by: Song Hongyan Acked-by: Srinivas Pandruvada Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 9a3a9db1b39d..30c7dc45e45f 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -52,6 +52,9 @@ #define HID_USAGE_SENSOR_ANGL_VELOCITY_Y_AXIS 0x200458 #define HID_USAGE_SENSOR_ANGL_VELOCITY_Z_AXIS 0x200459 +/* Gravity vector */ +#define HID_USAGE_SENSOR_GRAVITY_VECTOR 0x20007B + /* ORIENTATION: Compass 3D: (200083) */ #define HID_USAGE_SENSOR_COMPASS_3D 0x200083 #define HID_USAGE_SENSOR_DATA_ORIENTATION 0x200470 -- cgit v1.2.3 From 210af2a5f12403a8968d6014742886cc7e9823b4 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Thu, 5 Jan 2017 10:52:10 -0600 Subject: ipmi: make ipmi_usr_hndl const It's only function pointers. Signed-off-by: Corey Minyard --- include/linux/ipmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 78c5d5ae3857..f1045b2c6a00 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -100,7 +100,7 @@ struct ipmi_user_hndl { /* Create a new user of the IPMI layer on the given interface number. */ int ipmi_create_user(unsigned int if_num, - struct ipmi_user_hndl *handler, + const struct ipmi_user_hndl *handler, void *handler_data, ipmi_user_t *user); -- cgit v1.2.3 From 4e552c8cb5bc9137e67e035bab8df6dddbca7384 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 5 Jan 2017 11:34:12 +0900 Subject: leds: add LED_ON brightness as boolean value Some devices do not handle the led brightness or simply don't care about it. Conceptually said devices want to just switch on or off the led. It is useless in this case to have a 255 range of brightness, while just having an LED_ON and LED_OFF improves the boolean meaning of the led status. Signed-off-by: Andi Shyti Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- include/linux/leds.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 569cb531094c..bb50d0151e75 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -27,6 +27,7 @@ struct device; enum led_brightness { LED_OFF = 0, + LED_ON = 1, LED_HALF = 127, LED_FULL = 255, }; -- cgit v1.2.3 From eca90a3b3226fcecb4b3bbf4b0b6ff72422674bc Mon Sep 17 00:00:00 2001 From: Alexander Alemayhu Date: Thu, 5 Jan 2017 22:11:50 +0100 Subject: EDAC: Fix typos in enum mem_type comments s/labed/labeled/ s/differenciate/differentiate/ Signed-off-by: Alexander Alemayhu Cc: linux-edac Link: http://lkml.kernel.org/r/20170105211150.24003-1-alexander@alemayhu.com Signed-off-by: Borislav Petkov --- include/linux/edac.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 07c52c0af62d..5b6adf964248 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -190,8 +190,8 @@ static inline char *mc_event_error_type(const unsigned int err_type) * part of the memory details to the memory controller. * @MEM_RMBS: Rambus DRAM, used on a few Pentium III/IV controllers. * @MEM_DDR2: DDR2 RAM, as described at JEDEC JESD79-2F. - * Those memories are labed as "PC2-" instead of "PC" to - * differenciate from DDR. + * Those memories are labeled as "PC2-" instead of "PC" to + * differentiate from DDR. * @MEM_FB_DDR2: Fully-Buffered DDR2, as described at JEDEC Std No. 205 * and JESD206. * Those memories are accessed per DIMM slot, and not by -- cgit v1.2.3 From 69d707d034b6078f0b5998f80e5883c8243b205c Mon Sep 17 00:00:00 2001 From: "Karicheri, Muralidharan" Date: Fri, 6 Jan 2017 15:37:39 -0500 Subject: net: netcp: extract eflag from desc for rx_hook handling Extract the eflag bits from the received desc and pass it down the rx_hook chain to be available for netcp modules. Also the psdata and epib data has to be inspected by the netcp modules. So the desc can be freed only after returning from the rx_hook. So move knav_pool_desc_put() after the rx_hook processing. Signed-off-by: Murali Karicheri Signed-off-by: David S. Miller --- include/linux/soc/ti/knav_dma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h index 35cb9264e0d5..2b7882666ef6 100644 --- a/include/linux/soc/ti/knav_dma.h +++ b/include/linux/soc/ti/knav_dma.h @@ -41,6 +41,8 @@ #define KNAV_DMA_DESC_RETQ_SHIFT 0 #define KNAV_DMA_DESC_RETQ_MASK MASK(14) #define KNAV_DMA_DESC_BUF_LEN_MASK MASK(22) +#define KNAV_DMA_DESC_EFLAGS_MASK MASK(4) +#define KNAV_DMA_DESC_EFLAGS_SHIFT 20 #define KNAV_DMA_NUM_EPIB_WORDS 4 #define KNAV_DMA_NUM_PS_WORDS 16 -- cgit v1.2.3 From f099d616dd689d27b08dec95d0b6f1f302cf8ec4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 5 Jan 2017 12:01:30 -0800 Subject: fscrypt: remove unused 'mode' member of fscrypt_ctx Nothing reads or writes fscrypt_ctx.mode, and it doesn't belong there because a fscrypt_ctx is not tied to a specific encryption mode. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypto.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h index 2a2815702095..8635ea46ef6e 100644 --- a/include/linux/fscrypto.h +++ b/include/linux/fscrypto.h @@ -35,7 +35,6 @@ struct fscrypt_ctx { struct list_head free_list; /* Free list */ }; u8 flags; /* Flags */ - u8 mode; /* Encryption mode for tfm */ }; /** -- cgit v1.2.3 From a5d431eff2e0bb22156897435aa277ddc96074f7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 5 Jan 2017 13:51:18 -0800 Subject: fscrypt: make fscrypt_operations.key_prefix a string There was an unnecessary amount of complexity around requesting the filesystem-specific key prefix. It was unclear why; perhaps it was envisioned that different instances of the same filesystem type could use different key prefixes, or that key prefixes could be binary. However, neither of those things were implemented or really make sense at all. So simplify the code by making key_prefix a const char *. Signed-off-by: Eric Biggers Reviewed-by: Richard Weinberger Signed-off-by: Theodore Ts'o --- include/linux/fscrypto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h index 8635ea46ef6e..715f17b3c6d7 100644 --- a/include/linux/fscrypto.h +++ b/include/linux/fscrypto.h @@ -85,8 +85,8 @@ struct fscrypt_name { */ struct fscrypt_operations { unsigned int flags; + const char *key_prefix; int (*get_context)(struct inode *, void *, size_t); - int (*key_prefix)(struct inode *, u8 **); int (*prepare_context)(struct inode *); int (*set_context)(struct inode *, const void *, size_t, void *); int (*dummy_context)(struct inode *); -- cgit v1.2.3 From 2f5ff26478adaff5ed9b7ad4079d6a710b5f27e7 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 3 Jan 2017 23:55:21 +0200 Subject: mlx5: Fix naming convention with respect to UARs This establishes a solid naming conventions for UARs. A UAR (User Access Region) can have size identical to a system page or can be fixed 4KB depending on a value queried by firmware. Each UAR always has 4 blue flame register which are used to post doorbell to send queue. In addition, a UAR has section used for posting doorbells to CQs or EQs. In this patch we change names to reflect this conventions. Signed-off-by: Eli Cohen Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 9 +++++---- include/linux/mlx5/driver.h | 14 +++++++------- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 3ccaeff15a80..aa851c51ab59 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -212,10 +212,11 @@ enum { }; enum { - MLX5_BF_REGS_PER_PAGE = 4, - MLX5_MAX_UAR_PAGES = 1 << 8, - MLX5_NON_FP_BF_REGS_PER_PAGE = 2, - MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_NON_FP_BF_REGS_PER_PAGE, + MLX5_BFREGS_PER_UAR = 4, + MLX5_MAX_UARS = 1 << 8, + MLX5_NON_FP_BFREGS_PER_UAR = 2, + MLX5_MAX_BFREGS = MLX5_MAX_UARS * + MLX5_NON_FP_BFREGS_PER_UAR, }; enum { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index cfa49bca009c..3d07e25b3bf1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -188,16 +188,16 @@ enum mlx5_eq_type { #endif }; -struct mlx5_uuar_info { +struct mlx5_bfreg_info { struct mlx5_uar *uars; int num_uars; - int num_low_latency_uuars; + int num_low_latency_bfregs; unsigned long *bitmap; unsigned int *count; struct mlx5_bf *bfs; /* - * protect uuar allocation data structs + * protect bfreg allocation data structs */ struct mutex lock; u32 ver; @@ -217,7 +217,7 @@ struct mlx5_bf { /* serialize 64 bit writes when done as two 32 bit accesses */ spinlock_t lock32; - int uuarn; + int bfregn; }; struct mlx5_cmd_first { @@ -579,7 +579,7 @@ struct mlx5_priv { struct mlx5_eq_table eq_table; struct msix_entry *msix_arr; struct mlx5_irq_info *irq_info; - struct mlx5_uuar_info uuari; + struct mlx5_bfreg_info bfregi; MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock); /* pages stuff */ @@ -903,8 +903,8 @@ void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); -int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); -int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); +int mlx5_alloc_bfregs(struct mlx5_core_dev *dev, struct mlx5_bfreg_info *bfregi); +int mlx5_free_bfregs(struct mlx5_core_dev *dev, struct mlx5_bfreg_info *bfregi); int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar, bool map_wc); void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); -- cgit v1.2.3 From a6d51b68611e98f05042ada662aed5dbe3279c1e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 3 Jan 2017 23:55:23 +0200 Subject: net/mlx5: Introduce blue flame register allocator Here is an implementation of an allocator that allocates blue flame registers. A blue flame register is used for generating send doorbells. A blue flame register can be used to generate either a regular doorbell or a blue flame doorbell where the data to be sent is written to the device's I/O memory hence saving the need to read the data from memory. For blue flame kind of doorbells to succeed, the blue flame register need to be mapped as write combining. The user can specify what kind of send doorbells she wishes to use. If she requested write combining mapping but that failed, the allocator will fall back to non write combining mapping and will indicate that to the user. Subsequent patches in this series will make use of this allocator. Signed-off-by: Eli Cohen Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 2 ++ include/linux/mlx5/driver.h | 37 +++++++++++++++++++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 7 +++++-- 3 files changed, 44 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index aa851c51ab59..db1b9287012f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -215,6 +215,8 @@ enum { MLX5_BFREGS_PER_UAR = 4, MLX5_MAX_UARS = 1 << 8, MLX5_NON_FP_BFREGS_PER_UAR = 2, + MLX5_FP_BFREGS_PER_UAR = MLX5_BFREGS_PER_UAR - + MLX5_NON_FP_BFREGS_PER_UAR, MLX5_MAX_BFREGS = MLX5_MAX_UARS * MLX5_NON_FP_BFREGS_PER_UAR, }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3d07e25b3bf1..969aa1fe17e2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -452,6 +452,39 @@ struct mlx5_eq_table { spinlock_t lock; }; +struct mlx5_uars_page { + void __iomem *map; + bool wc; + u32 index; + struct list_head list; + unsigned int bfregs; + unsigned long *reg_bitmap; /* for non fast path bf regs */ + unsigned long *fp_bitmap; + unsigned int reg_avail; + unsigned int fp_avail; + struct kref ref_count; + struct mlx5_core_dev *mdev; +}; + +struct mlx5_bfreg_head { + /* protect blue flame registers allocations */ + struct mutex lock; + struct list_head list; +}; + +struct mlx5_bfreg_data { + struct mlx5_bfreg_head reg_head; + struct mlx5_bfreg_head wc_head; +}; + +struct mlx5_sq_bfreg { + void __iomem *map; + struct mlx5_uars_page *up; + bool wc; + u32 index; + unsigned int offset; +}; + struct mlx5_uar { u32 index; struct list_head bf_list; @@ -645,6 +678,7 @@ struct mlx5_priv { void *pfault_ctx; struct srcu_struct pfault_srcu; #endif + struct mlx5_bfreg_data bfregs; }; enum mlx5_device_state { @@ -1022,6 +1056,9 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index); void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate); bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate); +int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path); +void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg); static inline int fw_initializing(struct mlx5_core_dev *dev) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 15f896781966..1223feff0ea4 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -905,7 +905,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 uc[0x1]; u8 rc[0x1]; - u8 reserved_at_240[0xa]; + u8 uar_4k[0x1]; + u8 reserved_at_241[0x9]; u8 uar_sz[0x6]; u8 reserved_at_250[0x8]; u8 log_pg_sz[0x8]; @@ -997,7 +998,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 device_frequency_mhz[0x20]; u8 device_frequency_khz[0x20]; - u8 reserved_at_500[0x80]; + u8 reserved_at_500[0x20]; + u8 num_of_uars_per_page[0x20]; + u8 reserved_at_540[0x40]; u8 reserved_at_580[0x3f]; u8 cqe_compression[0x1]; -- cgit v1.2.3 From 9f09eaeae28a0c67b8fc2b5acb411a5d4fd8cc80 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 6 Jan 2017 17:39:06 -0800 Subject: net: ipmr: Remove nowait arg to ipmr_get_route ipmr_get_route has 1 caller and the nowait arg is 0. Remove the arg and simplify ipmr_get_route accordingly. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/mroute.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index e5fb81376e92..f019b62f27b5 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -120,5 +120,5 @@ struct mfc_cache { struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait, u32 portid); + struct rtmsg *rtm, u32 portid); #endif -- cgit v1.2.3 From bc1f44709cf27fb2a5766cadafe7e2ad5e9cb221 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 6 Jan 2017 19:12:52 -0800 Subject: net: make ndo_get_stats64 a void function The network device operation for reading statistics is only called in one place, and it ignores the return value. Having a structure return value is potentially confusing because some future driver could incorrectly assume that the return value was used. Fix all drivers with ndo_get_stats64 to have a void function. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ecd78b3c9aba..b14ad9c139d7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -913,8 +913,8 @@ struct netdev_xdp { * Callback used when the transmitter has not made any progress * for dev->watchdog ticks. * - * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, - * struct rtnl_link_stats64 *storage); + * void (*ndo_get_stats64)(struct net_device *dev, + * struct rtnl_link_stats64 *storage); * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); * Called when a user wants to get the network device usage * statistics. Drivers must do one of the following: @@ -1165,8 +1165,8 @@ struct net_device_ops { struct neigh_parms *); void (*ndo_tx_timeout) (struct net_device *dev); - struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, - struct rtnl_link_stats64 *storage); + void (*ndo_get_stats64)(struct net_device *dev, + struct rtnl_link_stats64 *storage); bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id); int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev, -- cgit v1.2.3 From 8a522bf2d4f788306443d36b26b54f0aedcdfdbe Mon Sep 17 00:00:00 2001 From: Peter Foley Date: Sun, 27 Nov 2016 21:37:20 -0500 Subject: extcon: adc-jack: Fix incompatible pointer type warning This patch fixes the incompatible warning of extcon-adc-jack.c driver when calling devm_extcon_dev_allocate(). Signed-off-by: Peter Foley [cw00.choi: Modify the patch title and descritpion] Signed-off-by: Chanwoo Choi --- include/linux/extcon/extcon-adc-jack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/extcon/extcon-adc-jack.h b/include/linux/extcon/extcon-adc-jack.h index a0e03b13b449..2aa32075bca1 100644 --- a/include/linux/extcon/extcon-adc-jack.h +++ b/include/linux/extcon/extcon-adc-jack.h @@ -59,7 +59,7 @@ struct adc_jack_pdata { const char *name; const char *consumer_channel; - const enum extcon *cable_names; + const unsigned int *cable_names; /* The last entry's state should be 0 */ struct adc_jack_cond *adc_conditions; -- cgit v1.2.3 From db6228612ce297949621a62e9d2331ee55016778 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 21 Dec 2016 14:10:47 +0800 Subject: extcon: Add documentation for EXTCON_CHG_USB_* and EXTCON_USB_* Current there is both "EXTCON_USB" and "EXTCON_CHG_USB_SDP" which both seem to suggest a standard downstream port. But there is no documentation describing how these relate. Thus add documentation to describe EXTCON_CHG_USB_SDP should always appear together with EXTCON_USB, and EXTCON_CHG_USB_ACA would normally appear with EXTCON_USB_HOST. Signed-off-by: Baolin Wang Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index b871c0cb1f02..00201230bea5 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -46,7 +46,14 @@ #define EXTCON_USB 1 #define EXTCON_USB_HOST 2 -/* Charging external connector */ +/* + * Charging external connector + * + * When one SDP charger connector was reported, we should also report + * the USB connector, which means EXTCON_CHG_USB_SDP should always + * appear together with EXTCON_USB. The same as ACA charger connector, + * EXTCON_CHG_USB_ACA would normally appear with EXTCON_USB_HOST. + */ #define EXTCON_CHG_USB_SDP 5 /* Standard Downstream Port */ #define EXTCON_CHG_USB_DCP 6 /* Dedicated Charging Port */ #define EXTCON_CHG_USB_CDP 7 /* Charging Downstream Port */ -- cgit v1.2.3 From e6cf046543763878614d51e8283abd40d5e5327e Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 26 Dec 2016 20:37:38 +0900 Subject: extcon: Move defintion of struct extcon_dev to driver/extcon directory This patch moves the 'struct extcon_dev' of extcon subsystem to driver/extcon/extcon.h header file because the struct extcon_dev have to be handled by extcon API to guarantee the consistency of strcut extcon_dev. If external drivers are able to touch the struct extcon_dev directly, it might cause the critical and unknown problem. Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 57 +------------------------------------------------- 1 file changed, 1 insertion(+), 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 00201230bea5..d57e52443841 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -167,62 +167,7 @@ union extcon_property_value { }; struct extcon_cable; - -/** - * struct extcon_dev - An extcon device represents one external connector. - * @name: The name of this extcon device. Parent device name is - * used if NULL. - * @supported_cable: Array of supported cable names ending with EXTCON_NONE. - * If supported_cable is NULL, cable name related APIs - * are disabled. - * @mutually_exclusive: Array of mutually exclusive set of cables that cannot - * be attached simultaneously. The array should be - * ending with NULL or be NULL (no mutually exclusive - * cables). For example, if it is { 0x7, 0x30, 0}, then, - * {0, 1}, {0, 1, 2}, {0, 2}, {1, 2}, or {4, 5} cannot - * be attached simulataneously. {0x7, 0} is equivalent to - * {0x3, 0x6, 0x5, 0}. If it is {0xFFFFFFFF, 0}, there - * can be no simultaneous connections. - * @dev: Device of this extcon. - * @state: Attach/detach state of this extcon. Do not provide at - * register-time. - * @nh: Notifier for the state change events from this extcon - * @entry: To support list of extcon devices so that users can - * search for extcon devices based on the extcon name. - * @lock: - * @max_supported: Internal value to store the number of cables. - * @extcon_dev_type: Device_type struct to provide attribute_groups - * customized for each extcon device. - * @cables: Sysfs subdirectories. Each represents one cable. - * - * In most cases, users only need to provide "User initializing data" of - * this struct when registering an extcon. In some exceptional cases, - * optional callbacks may be needed. However, the values in "internal data" - * are overwritten by register function. - */ -struct extcon_dev { - /* Optional user initializing data */ - const char *name; - const unsigned int *supported_cable; - const u32 *mutually_exclusive; - - /* Internal data. Please do not set. */ - struct device dev; - struct raw_notifier_head *nh; - struct list_head entry; - int max_supported; - spinlock_t lock; /* could be called by irq handler */ - u32 state; - - /* /sys/class/extcon/.../cable.n/... */ - struct device_type extcon_dev_type; - struct extcon_cable *cables; - - /* /sys/class/extcon/.../mutually_exclusive/... */ - struct attribute_group attr_g_muex; - struct attribute **attrs_muex; - struct device_attribute *d_attrs_muex; -}; +struct extcon_dev; #if IS_ENABLED(CONFIG_EXTCON) -- cgit v1.2.3 From 62a37443e93bbae74410cb72aa9d7e15a1da0b98 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 3 Jan 2017 13:50:54 +0800 Subject: extcon: Add documentation for EXTCON_CHG_USB_SLOW/FAST Currently there are no documentation for EXTCON_CHG_USB_SLOW/FAST charger connector. These names don't mean much and no guide to tell users how to use it, thus try to add documentation to make them clear. Suggested-by: NeilBrown Signed-off-by: Baolin Wang [cw00.choi: Use the 'connector' expression instead of 'cable'] Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index d57e52443841..242157cad25d 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -53,6 +53,10 @@ * the USB connector, which means EXTCON_CHG_USB_SDP should always * appear together with EXTCON_USB. The same as ACA charger connector, * EXTCON_CHG_USB_ACA would normally appear with EXTCON_USB_HOST. + * + * The EXTCON_CHG_USB_SLOW connector can provide at least 500mA of + * current at 5V. The EXTCON_CHG_USB_FAST connector can provide at + * least 1A of current at 5V. */ #define EXTCON_CHG_USB_SDP 5 /* Standard Downstream Port */ #define EXTCON_CHG_USB_DCP 6 /* Dedicated Charging Port */ -- cgit v1.2.3 From 3c5f0e076833c407cca372c663d47499ae4dab45 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 2 Jan 2017 13:03:03 +0900 Subject: extcon: Add new EXTCON_CHG_USB_PD type for USB Power Delivery This patch adds the new EXTCON_CHG_USB_PD for USB PD (Power Delivery)[1]. The USB Power Delivery specification specifies that USB cable provides the increased power more than 7.5W to device with larger power demand. The EXTCON_CHG_USB_PD has the EXTCON_TYPE_CHG and EXTCON_TYPE_USB type. [1] https://en.wikipedia.org/wiki/USB#PD Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 242157cad25d..7010fb01a81a 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -65,6 +65,7 @@ #define EXTCON_CHG_USB_FAST 9 #define EXTCON_CHG_USB_SLOW 10 #define EXTCON_CHG_WPT 11 /* Wireless Power Transfer */ +#define EXTCON_CHG_USB_PD 12 /* USB Power Delivery */ /* Jack external connector */ #define EXTCON_JACK_MICROPHONE 20 -- cgit v1.2.3 From e7246e122aaa99ebbb8ad7da80f35a20577bd8af Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 7 Jan 2017 17:06:35 -0500 Subject: net-tc: extract skip classify bit from tc_verd Packets sent by the IFB device skip subsequent tc classification. A single bit governs this state. Move it out of tc_verd in anticipation of removing that __u16 completely. The new bitfield tc_skip_classify temporarily uses one bit of a hole, until tc_verd is removed completely in a follow-up patch. Remove the bit hole comment. It could be 2, 3, 4 or 5 bits long. With that many options, little value in documenting it. Introduce a helper function to deduplicate the logic in the two sites that check this bit. The field tc_skip_classify is set only in IFB on skbs cloned in act_mirred, so original packet sources do not have to clear the bit when reusing packets (notably, pktgen and octeon). Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b53c0cfd417e..570f60ec6cb4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -589,6 +589,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @pkt_type: Packet class * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs + * @tc_skip_classify: do not classify packet. set by IFB device * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag @@ -749,7 +750,9 @@ struct sk_buff { #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; #endif - /* 2, 4 or 5 bit hole */ +#ifdef CONFIG_NET_CLS_ACT + __u8 tc_skip_classify:1; +#endif #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ -- cgit v1.2.3 From a5135bcfba7345031df45e02cd150a45add47cf8 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 7 Jan 2017 17:06:36 -0500 Subject: net-tc: convert tc_verd to integer bitfields Extract the remaining two fields from tc_verd and remove the __u16 completely. TC_AT and TC_FROM are converted to equivalent two-bit integer fields tc_at and tc_from. Where possible, use existing helper skb_at_tc_ingress when reading tc_at. Introduce helper skb_reset_tc to clear fields. Not documenting tc_from and tc_at, because they will be replaced with single bit fields in follow-on patches. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 570f60ec6cb4..f738d09947b2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -599,7 +599,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index - * @tc_verd: traffic control verdict * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices * @xmit_more: More SKBs are pending for this queue @@ -752,13 +751,12 @@ struct sk_buff { #endif #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; + __u8 tc_at:2; + __u8 tc_from:2; #endif #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ -#ifdef CONFIG_NET_CLS_ACT - __u16 tc_verd; /* traffic control verdict */ -#endif #endif union { -- cgit v1.2.3 From 8dc07fdbf2054f157e8333f940a1ad728916c786 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 7 Jan 2017 17:06:37 -0500 Subject: net-tc: convert tc_at to tc_at_ingress Field tc_at is used only within tc actions to distinguish ingress from egress processing. A single bit is sufficient for this purpose. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f738d09947b2..fab3f87e9bd1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -590,6 +590,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs * @tc_skip_classify: do not classify packet. set by IFB device + * @tc_at_ingress: used within tc_classify to distinguish in/egress * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag @@ -751,7 +752,7 @@ struct sk_buff { #endif #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; - __u8 tc_at:2; + __u8 tc_at_ingress:1; __u8 tc_from:2; #endif -- cgit v1.2.3 From bc31c905e946b5c55df5d2938335e78ffb3157ca Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 7 Jan 2017 17:06:38 -0500 Subject: net-tc: convert tc_from to tc_from_ingress and tc_redirected The tc_from field fulfills two roles. It encodes whether a packet was redirected by an act_mirred device and, if so, whether act_mirred was called on ingress or egress. Split it into separate fields. The information is needed by the special IFB loop, where packets are taken out of the normal path by act_mirred, forwarded to IFB, then reinjected at their original location (ingress or egress) by IFB. The IFB device cannot use skb->tc_at_ingress, because that may have been overwritten as the packet travels from act_mirred to ifb_xmit, when it passes through tc_classify on the IFB egress path. Cache this value in skb->tc_from_ingress. That field is valid only if a packet arriving at ifb_xmit came from act_mirred. Other packets can be crafted to reach ifb_xmit. These must be dropped. Set tc_redirected on redirection and drop all packets that do not have this bit set. Both fields are set only on cloned skbs in tc actions, so original packet sources do not have to clear the bit when reusing packets (notably, pktgen and octeon). Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fab3f87e9bd1..3149a88de548 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -591,6 +591,8 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @ipvs_property: skbuff is owned by ipvs * @tc_skip_classify: do not classify packet. set by IFB device * @tc_at_ingress: used within tc_classify to distinguish in/egress + * @tc_redirected: packet was redirected by a tc action + * @tc_from_ingress: if tc_redirected, tc_at_ingress at time of redirect * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag @@ -753,7 +755,8 @@ struct sk_buff { #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; __u8 tc_at_ingress:1; - __u8 tc_from:2; + __u8 tc_redirected:1; + __u8 tc_from_ingress:1; #endif #ifdef CONFIG_NET_SCHED -- cgit v1.2.3 From d6c99f4bf093a58d3ab47caaec74b81f18bc4e3f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Jan 2017 14:12:21 +0000 Subject: dma-fence: Wrap querying the fence->status The fence->status is an optional field that is only valid once the fence has been signaled. (Driver may fill the fence->status with an error code prior to calling dma_fence_signal().) Given the restriction upon its validity, wrap querying of the fence->status into a helper dma_fence_get_status(). Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Reviewed-by: Sumit Semwal Signed-off-by: Sumit Semwal Link: http://patchwork.freedesktop.org/patch/msgid/20170104141222.6992-2-chris@chris-wilson.co.uk --- include/linux/dma-fence.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index d51a7d23c358..19f7af905182 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -378,6 +378,30 @@ static inline struct dma_fence *dma_fence_later(struct dma_fence *f1, return dma_fence_is_signaled(f2) ? NULL : f2; } +/** + * dma_fence_get_status_locked - returns the status upon completion + * @fence: [in] the dma_fence to query + * + * Drivers can supply an optional error status condition before they signal + * the fence (to indicate whether the fence was completed due to an error + * rather than success). The value of the status condition is only valid + * if the fence has been signaled, dma_fence_get_status_locked() first checks + * the signal state before reporting the error status. + * + * Returns 0 if the fence has not yet been signaled, 1 if the fence has + * been signaled without an error condition, or a negative error code + * if the fence has been completed in err. + */ +static inline int dma_fence_get_status_locked(struct dma_fence *fence) +{ + if (dma_fence_is_signaled_locked(fence)) + return fence->status < 0 ? fence->status : 1; + else + return 0; +} + +int dma_fence_get_status(struct dma_fence *fence); + signed long dma_fence_wait_timeout(struct dma_fence *, bool intr, signed long timeout); signed long dma_fence_wait_any_timeout(struct dma_fence **fences, -- cgit v1.2.3 From b21507e272627c434e8dd74e8d51fd8245281b59 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Mon, 9 Jan 2017 10:07:31 -0500 Subject: proc,security: move restriction on writing /proc/pid/attr nodes to proc Processes can only alter their own security attributes via /proc/pid/attr nodes. This is presently enforced by each individual security module and is also imposed by the Linux credentials implementation, which only allows a task to alter its own credentials. Move the check enforcing this restriction from the individual security modules to proc_pid_attr_write() before calling the security hook, and drop the unnecessary task argument to the security hook since it can only ever be the current task. Signed-off-by: Stephen Smalley Acked-by: Casey Schaufler Acked-by: John Johansen Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 3 +-- include/linux/security.h | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 558adfa5c8a8..0dde95900196 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1547,8 +1547,7 @@ union security_list_options { void (*d_instantiate)(struct dentry *dentry, struct inode *inode); int (*getprocattr)(struct task_struct *p, char *name, char **value); - int (*setprocattr)(struct task_struct *p, char *name, void *value, - size_t size); + int (*setprocattr)(const char *name, void *value, size_t size); int (*ismaclabel)(const char *name); int (*secid_to_secctx)(u32 secid, char **secdata, u32 *seclen); int (*secctx_to_secid)(const char *secdata, u32 seclen, u32 *secid); diff --git a/include/linux/security.h b/include/linux/security.h index c2125e9093e8..f4ebac117fa6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -361,7 +361,7 @@ int security_sem_semop(struct sem_array *sma, struct sembuf *sops, unsigned nsops, int alter); void security_d_instantiate(struct dentry *dentry, struct inode *inode); int security_getprocattr(struct task_struct *p, char *name, char **value); -int security_setprocattr(struct task_struct *p, char *name, void *value, size_t size); +int security_setprocattr(const char *name, void *value, size_t size); int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_ismaclabel(const char *name); int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); @@ -1106,7 +1106,7 @@ static inline int security_getprocattr(struct task_struct *p, char *name, char * return -EINVAL; } -static inline int security_setprocattr(struct task_struct *p, char *name, void *value, size_t size) +static inline int security_setprocattr(char *name, void *value, size_t size) { return -EINVAL; } -- cgit v1.2.3 From a009e975da5c7d42a7f5eaadc54946eb5f76c9af Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Jan 2017 14:12:22 +0000 Subject: dma-fence: Introduce drm_fence_set_error() helper The dma_fence.error field (formerly known as dma_fence.status) is an optional field that may be set by drivers before calling dma_fence_signal(). The field can be used to indicate that the fence was completed in err rather than with success, and is visible to other consumers of the fence and to userspace via sync_file. This patch renames the field from status to error so that its meaning is hopefully more clear (and distinct from dma_fence_get_status() which is a composite between the error state and signal state) and adds a helper that validates the preconditions of when it is suitable to adjust the error field. Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Reviewed-by: Sumit Semwal Signed-off-by: Sumit Semwal Link: http://patchwork.freedesktop.org/patch/msgid/20170104141222.6992-3-chris@chris-wilson.co.uk --- include/linux/dma-fence.h | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 19f7af905182..6048fa404e57 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -47,7 +47,7 @@ struct dma_fence_cb; * can be compared to decide which fence would be signaled later. * @flags: A mask of DMA_FENCE_FLAG_* defined below * @timestamp: Timestamp when the fence was signaled. - * @status: Optional, only valid if < 0, must be set before calling + * @error: Optional, only valid if < 0, must be set before calling * dma_fence_signal, indicates that the fence has completed with an error. * * the flags member must be manipulated and read using the appropriate @@ -79,7 +79,7 @@ struct dma_fence { unsigned seqno; unsigned long flags; ktime_t timestamp; - int status; + int error; }; enum dma_fence_flag_bits { @@ -133,7 +133,7 @@ struct dma_fence_cb { * or some failure occurred that made it impossible to enable * signaling. True indicates successful enabling. * - * fence->status may be set in enable_signaling, but only when false is + * fence->error may be set in enable_signaling, but only when false is * returned. * * Calling dma_fence_signal before enable_signaling is called allows @@ -145,7 +145,7 @@ struct dma_fence_cb { * the second time will be a noop since it was already signaled. * * Notes on signaled: - * May set fence->status if returning true. + * May set fence->error if returning true. * * Notes on wait: * Must not be NULL, set to dma_fence_default_wait for default implementation. @@ -395,13 +395,33 @@ static inline struct dma_fence *dma_fence_later(struct dma_fence *f1, static inline int dma_fence_get_status_locked(struct dma_fence *fence) { if (dma_fence_is_signaled_locked(fence)) - return fence->status < 0 ? fence->status : 1; + return fence->error ?: 1; else return 0; } int dma_fence_get_status(struct dma_fence *fence); +/** + * dma_fence_set_error - flag an error condition on the fence + * @fence: [in] the dma_fence + * @error: [in] the error to store + * + * Drivers can supply an optional error status condition before they signal + * the fence, to indicate that the fence was completed due to an error + * rather than success. This must be set before signaling (so that the value + * is visible before any waiters on the signal callback are woken). This + * helper exists to help catching erroneous setting of #dma_fence.error. + */ +static inline void dma_fence_set_error(struct dma_fence *fence, + int error) +{ + BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); + BUG_ON(error >= 0 || error < -MAX_ERRNO); + + fence->error = error; +} + signed long dma_fence_wait_timeout(struct dma_fence *, bool intr, signed long timeout); signed long dma_fence_wait_any_timeout(struct dma_fence **fences, -- cgit v1.2.3 From f32815d21d4d8287336fb9cef4d2d9e0866214c2 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 2 Jan 2017 17:19:40 -0500 Subject: xtables: add xt_match, xt_target and data copy_to_user functions xt_entry_target, xt_entry_match and their private data may contain kernel data. Introduce helper functions xt_match_to_user, xt_target_to_user and xt_data_to_user that copy only the expected fields. These replace existing logic that calls copy_to_user on entire structs, then overwrites select fields. Private data is defined in xt_match and xt_target. All matches and targets that maintain kernel data store this at the tail of their private structure. Extend xt_match and xt_target with .usersize to limit how many bytes of data are copied. The remainder is cleared. If compatsize is specified, usersize can only safely be used if all fields up to usersize use platform-independent types. Otherwise, the compat_to_user callback must be defined. This patch does not yet enable the support logic. Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5117e4d2ddfa..be378cf47fcc 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -167,6 +167,7 @@ struct xt_match { const char *table; unsigned int matchsize; + unsigned int usersize; #ifdef CONFIG_COMPAT unsigned int compatsize; #endif @@ -207,6 +208,7 @@ struct xt_target { const char *table; unsigned int targetsize; + unsigned int usersize; #ifdef CONFIG_COMPAT unsigned int compatsize; #endif @@ -287,6 +289,13 @@ int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); +int xt_match_to_user(const struct xt_entry_match *m, + struct xt_entry_match __user *u); +int xt_target_to_user(const struct xt_entry_target *t, + struct xt_entry_target __user *u); +int xt_data_to_user(void __user *dst, const void *src, + int usersize, int size); + void *xt_copy_counters_from_user(const void __user *user, unsigned int len, struct xt_counters_info *info, bool compat); -- cgit v1.2.3 From 0118717583cda6f4f36092853ad0345e8150b286 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 3 Jan 2017 23:55:24 +0200 Subject: net/mlx5: Add interface to get reference to a UAR A reference to a UAR is required to generate CQ or EQ doorbells. Since CQ or EQ doorbells can all be generated using the same UAR area without any effect on performance, we are just getting a reference to any available UAR, If one is not available we allocate it but we don't waste the blue flame registers it can provide and we will use them for subsequent allocations. We get a reference to such UAR and put in mlx5_priv so any kernel consumer can make use of it. Signed-off-by: Eli Cohen Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 969aa1fe17e2..9a3a0954855b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -679,6 +679,7 @@ struct mlx5_priv { struct srcu_struct pfault_srcu; #endif struct mlx5_bfreg_data bfregs; + struct mlx5_uars_page *uar; }; enum mlx5_device_state { @@ -1007,7 +1008,7 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec); void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, - struct mlx5_uar *uar, enum mlx5_eq_type type); + enum mlx5_eq_type type); int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_start_eqs(struct mlx5_core_dev *dev); int mlx5_stop_eqs(struct mlx5_core_dev *dev); @@ -1118,6 +1119,8 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); +struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); +void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); struct mlx5_profile { u64 mask; -- cgit v1.2.3 From 5fe9dec0d045437e48f112b8fa705197bd7bc3c0 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 3 Jan 2017 23:55:25 +0200 Subject: IB/mlx5: Use blue flame register allocator in mlx5_ib Make use of the blue flame registers allocator at mlx5_ib. Since blue flame was not really supported we remove all the code that is related to blue flame and we let all consumers to use the same blue flame register. Once blue flame is supported we will add the code. As part of this patch we also move the definition of struct mlx5_bf to mlx5_ib.h as it is only used by mlx5_ib. Signed-off-by: Eli Cohen Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/cq.h | 3 +-- include/linux/mlx5/doorbell.h | 6 ++++-- include/linux/mlx5/driver.h | 19 ------------------- 3 files changed, 5 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 7c3c0d3aca37..996863381bc8 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -144,7 +144,6 @@ enum { static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, void __iomem *uar_page, - spinlock_t *doorbell_lock, u32 cons_index) { __be32 doorbell[2]; @@ -164,7 +163,7 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, doorbell[0] = cpu_to_be32(sn << 28 | cmd | ci); doorbell[1] = cpu_to_be32(cq->cqn); - mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, doorbell_lock); + mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL); } int mlx5_init_cq_table(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/doorbell.h b/include/linux/mlx5/doorbell.h index afc78a3f4462..0787de28f2fc 100644 --- a/include/linux/mlx5/doorbell.h +++ b/include/linux/mlx5/doorbell.h @@ -68,10 +68,12 @@ static inline void mlx5_write64(__be32 val[2], void __iomem *dest, { unsigned long flags; - spin_lock_irqsave(doorbell_lock, flags); + if (doorbell_lock) + spin_lock_irqsave(doorbell_lock, flags); __raw_writel((__force u32) val[0], dest); __raw_writel((__force u32) val[1], dest + 4); - spin_unlock_irqrestore(doorbell_lock, flags); + if (doorbell_lock) + spin_unlock_irqrestore(doorbell_lock, flags); } #endif diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9a3a0954855b..bb362f506a2e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -203,23 +203,6 @@ struct mlx5_bfreg_info { u32 ver; }; -struct mlx5_bf { - void __iomem *reg; - void __iomem *regreg; - int buf_size; - struct mlx5_uar *uar; - unsigned long offset; - int need_lock; - /* protect blue flame buffer selection when needed - */ - spinlock_t lock; - - /* serialize 64 bit writes when done as two 32 bit accesses - */ - spinlock_t lock32; - int bfregn; -}; - struct mlx5_cmd_first { __be32 data[4]; }; @@ -612,8 +595,6 @@ struct mlx5_priv { struct mlx5_eq_table eq_table; struct msix_entry *msix_arr; struct mlx5_irq_info *irq_info; - struct mlx5_bfreg_info bfregi; - MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock); /* pages stuff */ struct workqueue_struct *pg_wq; -- cgit v1.2.3 From b037c29a8056b8e896c4e084ba7cc30d6a1f165f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 3 Jan 2017 23:55:26 +0200 Subject: IB/mlx5: Allow future extension of libmlx5 input data Current check requests that new fields in struct mlx5_ib_alloc_ucontext_req_v2 that are not known to the driver be zero. This was introduced so new libraries passing additional information to the kernel through struct mlx5_ib_alloc_ucontext_req_v2 will be notified by old kernels that do not support their request by failing the operation. This schecme is problematic since it requires libmlx5 to issue the requests with descending input size for struct mlx5_ib_alloc_ucontext_req_v2. To avoid this, we require that new features that will obey the following rules: If the feature requires one or more fields in the response and the at least one of the fields can be encoded such that a zero value means the kernel ignored the request then this field will provide the indication to the library. If no response is required or if zero is a valid response, a new field should be added that indicates to the library whether its request was processed. Fixes: b368d7cb8ceb ('IB/mlx5: Add hca_core_clock_offset to udata in init_ucontext') Signed-off-by: Eli Cohen Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 12 +++++++----- include/linux/mlx5/driver.h | 12 ++++-------- 2 files changed, 11 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index db1b9287012f..dd345e8cf6f0 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -211,6 +211,11 @@ enum { MLX5_EN_WR = (u64)2 }; +enum { + MLX5_ADAPTER_PAGE_SHIFT = 12, + MLX5_ADAPTER_PAGE_SIZE = 1 << MLX5_ADAPTER_PAGE_SHIFT, +}; + enum { MLX5_BFREGS_PER_UAR = 4, MLX5_MAX_UARS = 1 << 8, @@ -219,6 +224,8 @@ enum { MLX5_NON_FP_BFREGS_PER_UAR, MLX5_MAX_BFREGS = MLX5_MAX_UARS * MLX5_NON_FP_BFREGS_PER_UAR, + MLX5_UARS_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE, + MLX5_NON_FP_BFREGS_IN_PAGE = MLX5_NON_FP_BFREGS_PER_UAR * MLX5_UARS_IN_PAGE, }; enum { @@ -391,11 +398,6 @@ enum { MLX5_MAX_PAGE_SHIFT = 31 }; -enum { - MLX5_ADAPTER_PAGE_SHIFT = 12, - MLX5_ADAPTER_PAGE_SIZE = 1 << MLX5_ADAPTER_PAGE_SHIFT, -}; - enum { MLX5_CAP_OFF_CMDIF_CSUM = 46, }; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index bb362f506a2e..7e7394fef835 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -189,18 +189,17 @@ enum mlx5_eq_type { }; struct mlx5_bfreg_info { - struct mlx5_uar *uars; - int num_uars; + u32 *sys_pages; int num_low_latency_bfregs; - unsigned long *bitmap; unsigned int *count; - struct mlx5_bf *bfs; /* * protect bfreg allocation data structs */ struct mutex lock; u32 ver; + bool lib_uar_4k; + u32 num_sys_pages; }; struct mlx5_cmd_first { @@ -470,13 +469,10 @@ struct mlx5_sq_bfreg { struct mlx5_uar { u32 index; - struct list_head bf_list; - unsigned free_bf_bmap; - void __iomem *bf_map; void __iomem *map; + void __iomem *bf_map; }; - struct mlx5_core_health { struct health_buffer __iomem *health; __be32 __iomem *health_counter; -- cgit v1.2.3 From 30aa60b3bd12bd79b5324b7b595bd3446ab24b52 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 3 Jan 2017 23:55:27 +0200 Subject: IB/mlx5: Support 4k UAR for libmlx5 Add fields to structs to convey to kernel an indication whether the library supports multi UARs per page and return to the library the size of a UAR based on the queried value. Signed-off-by: Eli Cohen Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/linux/mlx5/cq.h | 2 +- include/linux/mlx5/driver.h | 12 ------------ 2 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 996863381bc8..95898847c7d4 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -42,13 +42,13 @@ struct mlx5_core_cq { int cqe_sz; __be32 *set_ci_db; __be32 *arm_db; + struct mlx5_uars_page *uar; atomic_t refcount; struct completion free; unsigned vector; unsigned int irqn; void (*comp) (struct mlx5_core_cq *); void (*event) (struct mlx5_core_cq *, enum mlx5_event); - struct mlx5_uar *uar; u32 cons_index; unsigned arm_sn; struct mlx5_rsc_debug *dbg; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7e7394fef835..10e632588cd5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -467,12 +467,6 @@ struct mlx5_sq_bfreg { unsigned int offset; }; -struct mlx5_uar { - u32 index; - void __iomem *map; - void __iomem *bf_map; -}; - struct mlx5_core_health { struct health_buffer __iomem *health; __be32 __iomem *health_counter; @@ -725,7 +719,6 @@ struct mlx5_td { }; struct mlx5e_resources { - struct mlx5_uar cq_uar; u32 pdn; struct mlx5_td td; struct mlx5_core_mkey mkey; @@ -915,11 +908,6 @@ void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); -int mlx5_alloc_bfregs(struct mlx5_core_dev *dev, struct mlx5_bfreg_info *bfregi); -int mlx5_free_bfregs(struct mlx5_core_dev *dev, struct mlx5_bfreg_info *bfregi); -int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar, - bool map_wc); -void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); -- cgit v1.2.3 From 2c956a60778cbb6a27e0c7a8a52a91378c90e1d1 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 8 Jan 2017 13:54:00 +0100 Subject: siphash: add cryptographically secure PRF SipHash is a 64-bit keyed hash function that is actually a cryptographically secure PRF, like HMAC. Except SipHash is super fast, and is meant to be used as a hashtable keyed lookup function, or as a general PRF for short input use cases, such as sequence numbers or RNG chaining. For the first usage: There are a variety of attacks known as "hashtable poisoning" in which an attacker forms some data such that the hash of that data will be the same, and then preceeds to fill up all entries of a hashbucket. This is a realistic and well-known denial-of-service vector. Currently hashtables use jhash, which is fast but not secure, and some kind of rotating key scheme (or none at all, which isn't good). SipHash is meant as a replacement for jhash in these cases. There are a modicum of places in the kernel that are vulnerable to hashtable poisoning attacks, either via userspace vectors or network vectors, and there's not a reliable mechanism inside the kernel at the moment to fix it. The first step toward fixing these issues is actually getting a secure primitive into the kernel for developers to use. Then we can, bit by bit, port things over to it as deemed appropriate. While SipHash is extremely fast for a cryptographically secure function, it is likely a bit slower than the insecure jhash, and so replacements will be evaluated on a case-by-case basis based on whether or not the difference in speed is negligible and whether or not the current jhash usage poses a real security risk. For the second usage: A few places in the kernel are using MD5 or SHA1 for creating secure sequence numbers, syn cookies, port numbers, or fast random numbers. SipHash is a faster and more fitting, and more secure replacement for MD5 in those situations. Replacing MD5 and SHA1 with SipHash for these uses is obvious and straight-forward, and so is submitted along with this patch series. There shouldn't be much of a debate over its efficacy. Dozens of languages are already using this internally for their hash tables and PRFs. Some of the BSDs already use this in their kernels. SipHash is a widely known high-speed solution to a widely known set of problems, and it's time we catch-up. Signed-off-by: Jason A. Donenfeld Reviewed-by: Jean-Philippe Aumasson Cc: Linus Torvalds Cc: Eric Biggers Cc: David Laight Cc: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/siphash.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 include/linux/siphash.h (limited to 'include/linux') diff --git a/include/linux/siphash.h b/include/linux/siphash.h new file mode 100644 index 000000000000..feeb29cd113e --- /dev/null +++ b/include/linux/siphash.h @@ -0,0 +1,85 @@ +/* Copyright (C) 2016 Jason A. Donenfeld . All Rights Reserved. + * + * This file is provided under a dual BSD/GPLv2 license. + * + * SipHash: a fast short-input PRF + * https://131002.net/siphash/ + * + * This implementation is specifically for SipHash2-4. + */ + +#ifndef _LINUX_SIPHASH_H +#define _LINUX_SIPHASH_H + +#include +#include + +#define SIPHASH_ALIGNMENT __alignof__(u64) +typedef struct { + u64 key[2]; +} siphash_key_t; + +u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); +#endif + +u64 siphash_1u64(const u64 a, const siphash_key_t *key); +u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); +u64 siphash_3u64(const u64 a, const u64 b, const u64 c, + const siphash_key_t *key); +u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d, + const siphash_key_t *key); +u64 siphash_1u32(const u32 a, const siphash_key_t *key); +u64 siphash_3u32(const u32 a, const u32 b, const u32 c, + const siphash_key_t *key); + +static inline u64 siphash_2u32(const u32 a, const u32 b, + const siphash_key_t *key) +{ + return siphash_1u64((u64)b << 32 | a, key); +} +static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c, + const u32 d, const siphash_key_t *key) +{ + return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key); +} + + +static inline u64 ___siphash_aligned(const __le64 *data, size_t len, + const siphash_key_t *key) +{ + if (__builtin_constant_p(len) && len == 4) + return siphash_1u32(le32_to_cpup((const __le32 *)data), key); + if (__builtin_constant_p(len) && len == 8) + return siphash_1u64(le64_to_cpu(data[0]), key); + if (__builtin_constant_p(len) && len == 16) + return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), + key); + if (__builtin_constant_p(len) && len == 24) + return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), + le64_to_cpu(data[2]), key); + if (__builtin_constant_p(len) && len == 32) + return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), + le64_to_cpu(data[2]), le64_to_cpu(data[3]), + key); + return __siphash_aligned(data, len, key); +} + +/** + * siphash - compute 64-bit siphash PRF value + * @data: buffer to hash + * @size: size of @data + * @key: the siphash key + */ +static inline u64 siphash(const void *data, size_t len, + const siphash_key_t *key) +{ +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) + return __siphash_unaligned(data, len, key); +#endif + return ___siphash_aligned(data, len, key); +} + +#endif /* _LINUX_SIPHASH_H */ -- cgit v1.2.3 From 1ae2324f732c9c4e2fa4ebd885fa1001b70d52e1 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 8 Jan 2017 13:54:01 +0100 Subject: siphash: implement HalfSipHash1-3 for hash tables HalfSipHash, or hsiphash, is a shortened version of SipHash, which generates 32-bit outputs using a weaker 64-bit key. It has *much* lower security margins, and shouldn't be used for anything too sensitive, but it could be used as a hashtable key function replacement, if the output is never exposed, and if the security requirement is not too high. The goal is to make this something that performance-critical jhash users would be willing to use. On 64-bit machines, HalfSipHash1-3 is slower than SipHash1-3, so we alias SipHash1-3 to HalfSipHash1-3 on those systems. 64-bit x86_64: [ 0.509409] test_siphash: SipHash2-4 cycles: 4049181 [ 0.510650] test_siphash: SipHash1-3 cycles: 2512884 [ 0.512205] test_siphash: HalfSipHash1-3 cycles: 3429920 [ 0.512904] test_siphash: JenkinsHash cycles: 978267 So, we map hsiphash() -> SipHash1-3 32-bit x86: [ 0.509868] test_siphash: SipHash2-4 cycles: 14812892 [ 0.513601] test_siphash: SipHash1-3 cycles: 9510710 [ 0.515263] test_siphash: HalfSipHash1-3 cycles: 3856157 [ 0.515952] test_siphash: JenkinsHash cycles: 1148567 So, we map hsiphash() -> HalfSipHash1-3 hsiphash() is roughly 3 times slower than jhash(), but comes with a considerable security improvement. Signed-off-by: Jason A. Donenfeld Reviewed-by: Jean-Philippe Aumasson Signed-off-by: David S. Miller --- include/linux/siphash.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/siphash.h b/include/linux/siphash.h index feeb29cd113e..fa7a6b9cedbf 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -5,7 +5,9 @@ * SipHash: a fast short-input PRF * https://131002.net/siphash/ * - * This implementation is specifically for SipHash2-4. + * This implementation is specifically for SipHash2-4 for a secure PRF + * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for + * hashtables. */ #ifndef _LINUX_SIPHASH_H @@ -82,4 +84,57 @@ static inline u64 siphash(const void *data, size_t len, return ___siphash_aligned(data, len, key); } +#define HSIPHASH_ALIGNMENT __alignof__(unsigned long) +typedef struct { + unsigned long key[2]; +} hsiphash_key_t; + +u32 __hsiphash_aligned(const void *data, size_t len, + const hsiphash_key_t *key); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u32 __hsiphash_unaligned(const void *data, size_t len, + const hsiphash_key_t *key); +#endif + +u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); +u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); +u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c, + const hsiphash_key_t *key); +u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d, + const hsiphash_key_t *key); + +static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, + const hsiphash_key_t *key) +{ + if (__builtin_constant_p(len) && len == 4) + return hsiphash_1u32(le32_to_cpu(data[0]), key); + if (__builtin_constant_p(len) && len == 8) + return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), + key); + if (__builtin_constant_p(len) && len == 12) + return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), + le32_to_cpu(data[2]), key); + if (__builtin_constant_p(len) && len == 16) + return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), + le32_to_cpu(data[2]), le32_to_cpu(data[3]), + key); + return __hsiphash_aligned(data, len, key); +} + +/** + * hsiphash - compute 32-bit hsiphash PRF value + * @data: buffer to hash + * @size: size of @data + * @key: the hsiphash key + */ +static inline u32 hsiphash(const void *data, size_t len, + const hsiphash_key_t *key) +{ +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) + return __hsiphash_unaligned(data, len, key); +#endif + return ___hsiphash_aligned(data, len, key); +} + #endif /* _LINUX_SIPHASH_H */ -- cgit v1.2.3 From b4b7b772e8b018286482d8d1fba7804ceac56a64 Mon Sep 17 00:00:00 2001 From: jpinto Date: Mon, 9 Jan 2017 12:35:08 +0000 Subject: stmmac: adding DT parameter for LPI tx clock gating This patch adds a new parameter to the stmmac DT: snps,en-tx-lpi-clockgating. It was ported from synopsys/dwc_eth_qos.c and it is useful if lpi tx clock gating is needed by stmmac users also. Signed-off-by: Joao Pinto Tested-by: Niklas Cassel Reviewed-by: Lars Persson Acked-by: Alexandre TORGUE Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 889e0e9a3f1c..e3cd7588623d 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -142,5 +142,6 @@ struct plat_stmmacenet_data { int has_gmac4; bool tso_en; int mac_port_sel_speed; + bool en_tx_lpi_clockgating; }; #endif -- cgit v1.2.3 From f573c0b9c4e02691cf87736bd0824fd37ec02e65 Mon Sep 17 00:00:00 2001 From: jpinto Date: Mon, 9 Jan 2017 12:35:09 +0000 Subject: stmmac: move stmmac_clk, pclk, clk_ptp_ref and stmmac_rst to platform structure This patch moves stmmac_clk, pclk, clk_ptp_ref and stmmac_rst to the plat_stmmacenet_data structure. It also moves these platform variables initialization to stmmac_platform. This was done for two reasons: a) If PCI is used, platform related code is being executed in stmmac_main resulting in warnings that have no sense and conceptually was not right b) stmmac as a synopsys reference ethernet driver stack will be hosting more and more drivers to its structure like synopsys/dwc_eth_qos.c. These drivers have their own DT bindings that are not compatible with stmmac's. One of the most important are the clock names, and so they need to be parsed in the glue logic and initialized there, and that is the main reason why the clocks were passed to the platform structure. Signed-off-by: Joao Pinto Tested-by: Niklas Cassel Reviewed-by: Lars Persson Acked-by: Alexandre TORGUE Signed-off-by: David S. Miller --- include/linux/stmmac.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index e3cd7588623d..d76033d6726d 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -138,6 +138,11 @@ struct plat_stmmacenet_data { int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); void *bsp_priv; + struct clk *stmmac_clk; + struct clk *pclk; + struct clk *clk_ptp_ref; + unsigned int clk_ptp_rate; + struct reset_control *stmmac_rst; struct stmmac_axi *axi; int has_gmac4; bool tso_en; -- cgit v1.2.3 From ac7138746e14137a451f8539614cdd349153e0c0 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 9 Jan 2017 16:55:13 +0100 Subject: smc: establish new socket family * enable smc module loading and unloading * register new socket family * basic smc socket creation and deletion * use backing TCP socket to run CLC (Connection Layer Control) handshake of SMC protocol * Setup for infiniband traffic is implemented in follow-on patches. For now fallback to TCP socket is always used. Signed-off-by: Ursula Braun Reviewed-by: Utz Bacher Signed-off-by: David S. Miller --- include/linux/socket.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index c06438023d79..082027457825 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -202,8 +202,12 @@ struct ucred { #define AF_VSOCK 40 /* vSockets */ #define AF_KCM 41 /* Kernel Connection Multiplexor*/ #define AF_QIPCRTR 42 /* Qualcomm IPC Router */ +#define AF_SMC 43 /* smc sockets: reserve number for + * PF_SMC protocol family that + * reuses AF_INET address family + */ -#define AF_MAX 43 /* For now.. */ +#define AF_MAX 44 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -251,6 +255,7 @@ struct ucred { #define PF_VSOCK AF_VSOCK #define PF_KCM AF_KCM #define PF_QIPCRTR AF_QIPCRTR +#define PF_SMC AF_SMC #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ -- cgit v1.2.3 From 39f19ebbf57b403695f7b5f9cf322fe1ddb5d7fb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 9 Jan 2017 10:19:50 -0800 Subject: bpf: rename ARG_PTR_TO_STACK since ARG_PTR_TO_STACK is no longer just pointer to stack rename it to ARG_PTR_TO_MEM and adjust comment. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f74ae68086dc..94ea8d2383e6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -69,14 +69,14 @@ enum bpf_arg_type { /* the following constraints used to prototype bpf_memcmp() and other * functions that access data on eBPF program stack */ - ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ - ARG_PTR_TO_RAW_STACK, /* any pointer to eBPF program stack, area does not - * need to be initialized, helper function must fill - * all bytes or clear them in error case. + ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ + ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, + * helper function must fill all bytes or clear + * them in error case. */ - ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ - ARG_CONST_STACK_SIZE_OR_ZERO, /* number of bytes accessed from stack or 0 */ + ARG_CONST_SIZE, /* number of bytes accessed from memory */ + ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ -- cgit v1.2.3 From aecb57da7ae9df1f9c2eb11788888b135ddc0203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vesa=20J=C3=A4=C3=A4skel=C3=A4inen?= Date: Fri, 23 Dec 2016 13:15:58 +0200 Subject: rtc: tps65910: Add RTC calibration support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Texas Instrument's TPS65910 has support for compensating RTC crystal inaccuracies. When enabled every hour RTC counter value will be compensated with two's complement value. Signed-off-by: Vesa Jääskeläinen Acked-by: Lee Jones Signed-off-by: Alexandre Belloni --- include/linux/mfd/tps65910.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h index 6483a6fdce59..ffb21e79204d 100644 --- a/include/linux/mfd/tps65910.h +++ b/include/linux/mfd/tps65910.h @@ -134,6 +134,7 @@ /* RTC_CTRL_REG bitfields */ #define TPS65910_RTC_CTRL_STOP_RTC 0x01 /*0=stop, 1=run */ +#define TPS65910_RTC_CTRL_AUTO_COMP 0x04 #define TPS65910_RTC_CTRL_GET_TIME 0x40 /* RTC_STATUS_REG bitfields */ -- cgit v1.2.3 From b8aa8453918ebfd93d78de56c2afd4b735e02e27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 22 Dec 2016 00:32:25 +0100 Subject: security: Fix inode_getattr documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace arguments @mnt and @dentry with @path. Signed-off-by: Mickaël Salaün Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 558adfa5c8a8..9cf50ad2fe20 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -352,8 +352,7 @@ * Return 0 if permission is granted. * @inode_getattr: * Check permission before obtaining file attributes. - * @mnt is the vfsmount where the dentry was looked up - * @dentry contains the dentry structure for the file. + * @path contains the path structure for the file. * Return 0 if permission is granted. * @inode_setxattr: * Check permission before setting the extended attributes -- cgit v1.2.3 From 989e0aac1a801e9e9580632c9fd448a7aaca596a Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 30 Dec 2016 15:01:17 +0100 Subject: ata: pass queued command to ->sff_data_xfer method For Atari Falcon PATA support we need to check the current command in its ->sff_data_xfer method. Update core code and all users accordingly. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo --- include/linux/libata.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index c170be548b7f..0e8a8000b45f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -968,7 +968,7 @@ struct ata_port_operations { void (*sff_tf_read)(struct ata_port *ap, struct ata_taskfile *tf); void (*sff_exec_command)(struct ata_port *ap, const struct ata_taskfile *tf); - unsigned int (*sff_data_xfer)(struct ata_device *dev, + unsigned int (*sff_data_xfer)(struct ata_queued_cmd *qc, unsigned char *buf, unsigned int buflen, int rw); void (*sff_irq_on)(struct ata_port *); bool (*sff_irq_check)(struct ata_port *); @@ -1823,11 +1823,11 @@ extern void ata_sff_tf_load(struct ata_port *ap, const struct ata_taskfile *tf); extern void ata_sff_tf_read(struct ata_port *ap, struct ata_taskfile *tf); extern void ata_sff_exec_command(struct ata_port *ap, const struct ata_taskfile *tf); -extern unsigned int ata_sff_data_xfer(struct ata_device *dev, +extern unsigned int ata_sff_data_xfer(struct ata_queued_cmd *qc, unsigned char *buf, unsigned int buflen, int rw); -extern unsigned int ata_sff_data_xfer32(struct ata_device *dev, +extern unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc, unsigned char *buf, unsigned int buflen, int rw); -extern unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, +extern unsigned int ata_sff_data_xfer_noirq(struct ata_queued_cmd *qc, unsigned char *buf, unsigned int buflen, int rw); extern void ata_sff_irq_on(struct ata_port *ap); extern void ata_sff_irq_clear(struct ata_port *ap); -- cgit v1.2.3 From 39d3e7584a686541a3295ff1624d341e669e1afc Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 10 Jan 2017 00:02:13 +0000 Subject: rdmacg: Added rdma cgroup controller Added rdma cgroup controller that does accounting, limit enforcement on rdma/IB resources. Added rdma cgroup header file which defines its APIs to perform charging/uncharging functionality. It also defined APIs for RDMA/IB stack for device registration. Devices which are registered will participate in controller functions of accounting and limit enforcements. It define rdmacg_device structure to bind IB stack and RDMA cgroup controller. RDMA resources are tracked using resource pool. Resource pool is per device, per cgroup entity which allows setting up accounting limits on per device basis. Currently resources are defined by the RDMA cgroup. Resource pool is created/destroyed dynamically whenever charging/uncharging occurs respectively and whenever user configuration is done. Its a tradeoff of memory vs little more code space that creates resource pool object whenever necessary, instead of creating them during cgroup creation and device registration time. Signed-off-by: Parav Pandit Signed-off-by: Tejun Heo --- include/linux/cgroup_rdma.h | 53 +++++++++++++++++++++++++++++++++++++++++++ include/linux/cgroup_subsys.h | 4 ++++ 2 files changed, 57 insertions(+) create mode 100644 include/linux/cgroup_rdma.h (limited to 'include/linux') diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h new file mode 100644 index 000000000000..e94290b29e99 --- /dev/null +++ b/include/linux/cgroup_rdma.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2016 Parav Pandit + * + * This file is subject to the terms and conditions of version 2 of the GNU + * General Public License. See the file COPYING in the main directory of the + * Linux distribution for more details. + */ + +#ifndef _CGROUP_RDMA_H +#define _CGROUP_RDMA_H + +#include + +enum rdmacg_resource_type { + RDMACG_RESOURCE_HCA_HANDLE, + RDMACG_RESOURCE_HCA_OBJECT, + RDMACG_RESOURCE_MAX, +}; + +#ifdef CONFIG_CGROUP_RDMA + +struct rdma_cgroup { + struct cgroup_subsys_state css; + + /* + * head to keep track of all resource pools + * that belongs to this cgroup. + */ + struct list_head rpools; +}; + +struct rdmacg_device { + struct list_head dev_node; + struct list_head rpools; + char *name; +}; + +/* + * APIs for RDMA/IB stack to publish when a device wants to + * participate in resource accounting + */ +int rdmacg_register_device(struct rdmacg_device *device); +void rdmacg_unregister_device(struct rdmacg_device *device); + +/* APIs for RDMA/IB stack to charge/uncharge pool specific resources */ +int rdmacg_try_charge(struct rdma_cgroup **rdmacg, + struct rdmacg_device *device, + enum rdmacg_resource_type index); +void rdmacg_uncharge(struct rdma_cgroup *cg, + struct rdmacg_device *device, + enum rdmacg_resource_type index); +#endif /* CONFIG_CGROUP_RDMA */ +#endif /* _CGROUP_RDMA_H */ diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 0df0336acee9..d0e597c44585 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -56,6 +56,10 @@ SUBSYS(hugetlb) SUBSYS(pids) #endif +#if IS_ENABLED(CONFIG_CGROUP_RDMA) +SUBSYS(rdma) +#endif + /* * The following subsystems are not supported on the default hierarchy. */ -- cgit v1.2.3 From 78c9981f6132cb600d545007c91e300021b7caf3 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 2 Jan 2017 19:28:24 +0000 Subject: iio:buffer: Stop exporting iio_update_demux Nothing outside of indiustrialio-buffer.c should be using this. Requires a large amount of juggling of functions to avoid a forward definition. Signed-off-by: Jonathan Cameron Reviewed-by: Lars-Peter Clausen --- include/linux/iio/buffer.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index 70a5164f4728..889d0f2f5d7b 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -168,8 +168,6 @@ static inline int iio_push_to_buffers_with_timestamp(struct iio_dev *indio_dev, return iio_push_to_buffers(indio_dev, data); } -int iio_update_demux(struct iio_dev *indio_dev); - bool iio_validate_scan_mask_onehot(struct iio_dev *indio_dev, const unsigned long *mask); -- cgit v1.2.3 From 263cf5e6577a779cc3311c5290325ef3917de2ea Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 2 Jan 2017 19:28:25 +0000 Subject: iio:buffer.h Reformat structure comments to be inline. This should make it easier to see how the structure is split into public and private parts - reflected in the generated documentation. Deliberately use /* instead of /** for the private elements to avoid warnings when kernel-doc script runs. Signed-off-by: Jonathan Cameron Reviewed-by: Lars-Peter Clausen --- include/linux/iio/buffer.h | 100 ++++++++++++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index 889d0f2f5d7b..4a65a7bb40a4 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -74,45 +74,71 @@ struct iio_buffer_access_funcs { /** * struct iio_buffer - general buffer structure - * @length: [DEVICE] number of datums in buffer - * @bytes_per_datum: [DEVICE] size of individual datum including timestamp - * @scan_el_attrs: [DRIVER] control of scan elements if that scan mode - * control method is used - * @scan_mask: [INTERN] bitmask used in masking scan mode elements - * @scan_timestamp: [INTERN] does the scan mode include a timestamp - * @access: [DRIVER] buffer access functions associated with the - * implementation. - * @scan_el_dev_attr_list:[INTERN] list of scan element related attributes. - * @buffer_group: [INTERN] attributes of the buffer group - * @scan_el_group: [DRIVER] attribute group for those attributes not - * created from the iio_chan_info array. - * @pollq: [INTERN] wait queue to allow for polling on the buffer. - * @stufftoread: [INTERN] flag to indicate new data. - * @attrs: [INTERN] standard attributes of the buffer - * @demux_list: [INTERN] list of operations required to demux the scan. - * @demux_bounce: [INTERN] buffer for doing gather from incoming scan. - * @buffer_list: [INTERN] entry in the devices list of current buffers. - * @ref: [INTERN] reference count of the buffer. - * @watermark: [INTERN] number of datums to wait for poll/read. + * + * Note that the internals of this structure should only be of interest to + * those writing new buffer implementations. */ struct iio_buffer { - int length; - int bytes_per_datum; - struct attribute_group *scan_el_attrs; - long *scan_mask; - bool scan_timestamp; - const struct iio_buffer_access_funcs *access; - struct list_head scan_el_dev_attr_list; - struct attribute_group buffer_group; - struct attribute_group scan_el_group; - wait_queue_head_t pollq; - bool stufftoread; - const struct attribute **attrs; - struct list_head demux_list; - void *demux_bounce; - struct list_head buffer_list; - struct kref ref; - unsigned int watermark; + /** @length: Number of datums in buffer. */ + int length; + + /** @bytes_per_datum: Size of individual datum including timestamp. */ + int bytes_per_datum; + + /** + * @access: Buffer access functions associated with the + * implementation. + */ + const struct iio_buffer_access_funcs *access; + + /** @scan_mask: Bitmask used in masking scan mode elements. */ + long *scan_mask; + + /** @demux_list: List of operations required to demux the scan. */ + struct list_head demux_list; + + /** @pollq: Wait queue to allow for polling on the buffer. */ + wait_queue_head_t pollq; + + /** @watermark: Number of datums to wait for poll/read. */ + unsigned int watermark; + + /* private: */ + /* + * @scan_el_attrs: Control of scan elements if that scan mode + * control method is used. + */ + struct attribute_group *scan_el_attrs; + + /* @scan_timestamp: Does the scan mode include a timestamp. */ + bool scan_timestamp; + + /* @scan_el_dev_attr_list: List of scan element related attributes. */ + struct list_head scan_el_dev_attr_list; + + /* @buffer_group: Attributes of the buffer group. */ + struct attribute_group buffer_group; + + /* + * @scan_el_group: Attribute group for those attributes not + * created from the iio_chan_info array. + */ + struct attribute_group scan_el_group; + + /* @stufftoread: Flag to indicate new data. */ + bool stufftoread; + + /* @attrs: Standard attributes of the buffer. */ + const struct attribute **attrs; + + /* @demux_bounce: Buffer for doing gather from incoming scan. */ + void *demux_bounce; + + /* @buffer_list: Entry in the devices list of current buffers. */ + struct list_head buffer_list; + + /* @ref: Reference count of the buffer. */ + struct kref ref; }; /** -- cgit v1.2.3 From 9f4667776c138df33c4107fcd8811aa9cb6cdcbe Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 2 Jan 2017 19:28:26 +0000 Subject: iio:buffer: Introduced a function to assign the buffer specific attrs. This is a necessary step in taking the buffer implementation opaque. Signed-off-by: Jonathan Cameron Reviewed-by: Lars-Peter Clausen --- include/linux/iio/buffer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index 4a65a7bb40a4..30ea9806db67 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -17,6 +17,8 @@ struct iio_buffer; +void iio_buffer_set_attrs(struct iio_buffer *buffer, + const struct attribute **attrs); /** * INDIO_BUFFER_FLAG_FIXED_WATERMARK - Watermark level of the buffer can not be * configured. It has a fixed value which will be buffer specific. -- cgit v1.2.3 From c2bf8d5f3262b3942bf923ef3b86d6ebe590821d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 2 Jan 2017 19:28:27 +0000 Subject: iio:buffer: Stop exporting iio_scan_mask_query Nothing uses it outside of core code. Signed-off-by: Jonathan Cameron Reviewed-by: Lars-Peter Clausen --- include/linux/iio/buffer.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index 30ea9806db67..635aa87eb5e9 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -161,9 +161,6 @@ int iio_update_buffers(struct iio_dev *indio_dev, **/ void iio_buffer_init(struct iio_buffer *buffer); -int iio_scan_mask_query(struct iio_dev *indio_dev, - struct iio_buffer *buffer, int bit); - /** * iio_push_to_buffers() - push to a registered buffer. * @indio_dev: iio_dev structure for device. -- cgit v1.2.3 From 315a19eca0e7cbae1bef7f43b36fdcfc33f248f6 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 2 Jan 2017 19:28:28 +0000 Subject: iio:buffers: Push some docs down into the .c file. Ancient legacy of me doing it wrong which it is nice to clear up whilst we are here. Signed-off-by: Jonathan Cameron Reviewed-by: Lars-Peter Clausen --- include/linux/iio/buffer.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index 635aa87eb5e9..cd77ed14eb7f 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -161,11 +161,6 @@ int iio_update_buffers(struct iio_dev *indio_dev, **/ void iio_buffer_init(struct iio_buffer *buffer); -/** - * iio_push_to_buffers() - push to a registered buffer. - * @indio_dev: iio_dev structure for device. - * @data: Full scan. - */ int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data); /* -- cgit v1.2.3 From d4ad4f4b721ad76e28b73e32b8602c011e281893 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 2 Jan 2017 19:28:29 +0000 Subject: iio:buffer:iio_push_to_buffers_with_timestamp fix kernel-doc. Wrong start of kernel doc comment /* -> /** Signed-off-by: Jonathan Cameron Reviewed-by: Lars-Peter Clausen --- include/linux/iio/buffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index cd77ed14eb7f..8c915c2c18f1 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -163,7 +163,7 @@ void iio_buffer_init(struct iio_buffer *buffer); int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data); -/* +/** * iio_push_to_buffers_with_timestamp() - push data and timestamp to buffers * @indio_dev: iio_dev structure for device. * @data: sample data -- cgit v1.2.3 From 8abd5ba53962854c3a1c21d04fa6fdba54cc0ee1 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 2 Jan 2017 19:28:30 +0000 Subject: iio:kfifo_buf header include push down. As a precursor to splitting buffer.h, lets make sure all drivers include the relevant headers rather than relying on picking them up from kfifo_buf.h. Signed-off-by: Jonathan Cameron Reviewed-by: Lars-Peter Clausen --- include/linux/iio/kfifo_buf.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/kfifo_buf.h b/include/linux/iio/kfifo_buf.h index 1683bc710d14..027cfa9c3703 100644 --- a/include/linux/iio/kfifo_buf.h +++ b/include/linux/iio/kfifo_buf.h @@ -1,9 +1,8 @@ #ifndef __LINUX_IIO_KFIFO_BUF_H__ #define __LINUX_IIO_KFIFO_BUF_H__ -#include -#include -#include +struct iio_buffer; +struct device; struct iio_buffer *iio_kfifo_allocate(void); void iio_kfifo_free(struct iio_buffer *r); -- cgit v1.2.3 From 2b827ad54111439b919605da90e122ecda88bad6 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 2 Jan 2017 19:28:32 +0000 Subject: iio:buffer: Push implementation of iio_device_attach_buffer into .c file This is a precursor to the splitting of buffer.h into parts relevant to buffer implementation vs those for devices using buffers. struct buffer is about to become opaque as far as the header is concerned. Signed-off-by: Jonathan Cameron Reviewed-by: Lars-Peter Clausen --- include/linux/iio/buffer.h | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index 8c915c2c18f1..5cdfe2b7d674 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -194,20 +194,8 @@ bool iio_validate_scan_mask_onehot(struct iio_dev *indio_dev, struct iio_buffer *iio_buffer_get(struct iio_buffer *buffer); void iio_buffer_put(struct iio_buffer *buffer); -/** - * iio_device_attach_buffer - Attach a buffer to a IIO device - * @indio_dev: The device the buffer should be attached to - * @buffer: The buffer to attach to the device - * - * This function attaches a buffer to a IIO device. The buffer stays attached to - * the device until the device is freed. The function should only be called at - * most once per device. - */ -static inline void iio_device_attach_buffer(struct iio_dev *indio_dev, - struct iio_buffer *buffer) -{ - indio_dev->buffer = iio_buffer_get(buffer); -} +void iio_device_attach_buffer(struct iio_dev *indio_dev, + struct iio_buffer *buffer); #else /* CONFIG_IIO_BUFFER */ -- cgit v1.2.3 From 33dd94cb972175249258329c4aaffddcc82c2005 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 2 Jan 2017 19:28:34 +0000 Subject: iio:buffer.h - split into buffer.h and buffer_impl.h buffer.h supplies everything needed for devices using buffers. buffer_impl.h supplies access to the internals as needed to write a buffer implementation. This was really motivated by the mess that turned up in the kernel-doc documentation pulled in by the new sphinx docs. It made it clear that our logical separations in headers were generally terrible. The buffer case was easy to sort out without greatly effecting drivers so here it is. Signed-off-by: Jonathan Cameron Reviewed-by: Lars-Peter Clausen --- include/linux/iio/buffer.h | 156 +------------------------------------- include/linux/iio/buffer_impl.h | 162 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+), 155 deletions(-) create mode 100644 include/linux/iio/buffer_impl.h (limited to 'include/linux') diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index 5cdfe2b7d674..48767c776119 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -11,155 +11,11 @@ #define _IIO_BUFFER_GENERIC_H_ #include #include -#include - -#ifdef CONFIG_IIO_BUFFER struct iio_buffer; void iio_buffer_set_attrs(struct iio_buffer *buffer, const struct attribute **attrs); -/** - * INDIO_BUFFER_FLAG_FIXED_WATERMARK - Watermark level of the buffer can not be - * configured. It has a fixed value which will be buffer specific. - */ -#define INDIO_BUFFER_FLAG_FIXED_WATERMARK BIT(0) - -/** - * struct iio_buffer_access_funcs - access functions for buffers. - * @store_to: actually store stuff to the buffer - * @read_first_n: try to get a specified number of bytes (must exist) - * @data_available: indicates how much data is available for reading from - * the buffer. - * @request_update: if a parameter change has been marked, update underlying - * storage. - * @set_bytes_per_datum:set number of bytes per datum - * @set_length: set number of datums in buffer - * @enable: called if the buffer is attached to a device and the - * device starts sampling. Calls are balanced with - * @disable. - * @disable: called if the buffer is attached to a device and the - * device stops sampling. Calles are balanced with @enable. - * @release: called when the last reference to the buffer is dropped, - * should free all resources allocated by the buffer. - * @modes: Supported operating modes by this buffer type - * @flags: A bitmask combination of INDIO_BUFFER_FLAG_* - * - * The purpose of this structure is to make the buffer element - * modular as event for a given driver, different usecases may require - * different buffer designs (space efficiency vs speed for example). - * - * It is worth noting that a given buffer implementation may only support a - * small proportion of these functions. The core code 'should' cope fine with - * any of them not existing. - **/ -struct iio_buffer_access_funcs { - int (*store_to)(struct iio_buffer *buffer, const void *data); - int (*read_first_n)(struct iio_buffer *buffer, - size_t n, - char __user *buf); - size_t (*data_available)(struct iio_buffer *buffer); - - int (*request_update)(struct iio_buffer *buffer); - - int (*set_bytes_per_datum)(struct iio_buffer *buffer, size_t bpd); - int (*set_length)(struct iio_buffer *buffer, int length); - - int (*enable)(struct iio_buffer *buffer, struct iio_dev *indio_dev); - int (*disable)(struct iio_buffer *buffer, struct iio_dev *indio_dev); - - void (*release)(struct iio_buffer *buffer); - - unsigned int modes; - unsigned int flags; -}; - -/** - * struct iio_buffer - general buffer structure - * - * Note that the internals of this structure should only be of interest to - * those writing new buffer implementations. - */ -struct iio_buffer { - /** @length: Number of datums in buffer. */ - int length; - - /** @bytes_per_datum: Size of individual datum including timestamp. */ - int bytes_per_datum; - - /** - * @access: Buffer access functions associated with the - * implementation. - */ - const struct iio_buffer_access_funcs *access; - - /** @scan_mask: Bitmask used in masking scan mode elements. */ - long *scan_mask; - - /** @demux_list: List of operations required to demux the scan. */ - struct list_head demux_list; - - /** @pollq: Wait queue to allow for polling on the buffer. */ - wait_queue_head_t pollq; - - /** @watermark: Number of datums to wait for poll/read. */ - unsigned int watermark; - - /* private: */ - /* - * @scan_el_attrs: Control of scan elements if that scan mode - * control method is used. - */ - struct attribute_group *scan_el_attrs; - - /* @scan_timestamp: Does the scan mode include a timestamp. */ - bool scan_timestamp; - - /* @scan_el_dev_attr_list: List of scan element related attributes. */ - struct list_head scan_el_dev_attr_list; - - /* @buffer_group: Attributes of the buffer group. */ - struct attribute_group buffer_group; - - /* - * @scan_el_group: Attribute group for those attributes not - * created from the iio_chan_info array. - */ - struct attribute_group scan_el_group; - - /* @stufftoread: Flag to indicate new data. */ - bool stufftoread; - - /* @attrs: Standard attributes of the buffer. */ - const struct attribute **attrs; - - /* @demux_bounce: Buffer for doing gather from incoming scan. */ - void *demux_bounce; - - /* @buffer_list: Entry in the devices list of current buffers. */ - struct list_head buffer_list; - - /* @ref: Reference count of the buffer. */ - struct kref ref; -}; - -/** - * iio_update_buffers() - add or remove buffer from active list - * @indio_dev: device to add buffer to - * @insert_buffer: buffer to insert - * @remove_buffer: buffer_to_remove - * - * Note this will tear down the all buffering and build it up again - */ -int iio_update_buffers(struct iio_dev *indio_dev, - struct iio_buffer *insert_buffer, - struct iio_buffer *remove_buffer); - -/** - * iio_buffer_init() - Initialize the buffer structure - * @buffer: buffer to be initialized - **/ -void iio_buffer_init(struct iio_buffer *buffer); int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data); @@ -189,19 +45,9 @@ static inline int iio_push_to_buffers_with_timestamp(struct iio_dev *indio_dev, } bool iio_validate_scan_mask_onehot(struct iio_dev *indio_dev, - const unsigned long *mask); - -struct iio_buffer *iio_buffer_get(struct iio_buffer *buffer); -void iio_buffer_put(struct iio_buffer *buffer); + const unsigned long *mask); void iio_device_attach_buffer(struct iio_dev *indio_dev, struct iio_buffer *buffer); -#else /* CONFIG_IIO_BUFFER */ - -static inline void iio_buffer_get(struct iio_buffer *buffer) {} -static inline void iio_buffer_put(struct iio_buffer *buffer) {} - -#endif /* CONFIG_IIO_BUFFER */ - #endif /* _IIO_BUFFER_GENERIC_H_ */ diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h new file mode 100644 index 000000000000..8daba198fafa --- /dev/null +++ b/include/linux/iio/buffer_impl.h @@ -0,0 +1,162 @@ +#ifndef _IIO_BUFFER_GENERIC_IMPL_H_ +#define _IIO_BUFFER_GENERIC_IMPL_H_ +#include +#include + +#ifdef CONFIG_IIO_BUFFER + +struct iio_dev; +struct iio_buffer; + +/** + * INDIO_BUFFER_FLAG_FIXED_WATERMARK - Watermark level of the buffer can not be + * configured. It has a fixed value which will be buffer specific. + */ +#define INDIO_BUFFER_FLAG_FIXED_WATERMARK BIT(0) + +/** + * struct iio_buffer_access_funcs - access functions for buffers. + * @store_to: actually store stuff to the buffer + * @read_first_n: try to get a specified number of bytes (must exist) + * @data_available: indicates how much data is available for reading from + * the buffer. + * @request_update: if a parameter change has been marked, update underlying + * storage. + * @set_bytes_per_datum:set number of bytes per datum + * @set_length: set number of datums in buffer + * @enable: called if the buffer is attached to a device and the + * device starts sampling. Calls are balanced with + * @disable. + * @disable: called if the buffer is attached to a device and the + * device stops sampling. Calles are balanced with @enable. + * @release: called when the last reference to the buffer is dropped, + * should free all resources allocated by the buffer. + * @modes: Supported operating modes by this buffer type + * @flags: A bitmask combination of INDIO_BUFFER_FLAG_* + * + * The purpose of this structure is to make the buffer element + * modular as event for a given driver, different usecases may require + * different buffer designs (space efficiency vs speed for example). + * + * It is worth noting that a given buffer implementation may only support a + * small proportion of these functions. The core code 'should' cope fine with + * any of them not existing. + **/ +struct iio_buffer_access_funcs { + int (*store_to)(struct iio_buffer *buffer, const void *data); + int (*read_first_n)(struct iio_buffer *buffer, + size_t n, + char __user *buf); + size_t (*data_available)(struct iio_buffer *buffer); + + int (*request_update)(struct iio_buffer *buffer); + + int (*set_bytes_per_datum)(struct iio_buffer *buffer, size_t bpd); + int (*set_length)(struct iio_buffer *buffer, int length); + + int (*enable)(struct iio_buffer *buffer, struct iio_dev *indio_dev); + int (*disable)(struct iio_buffer *buffer, struct iio_dev *indio_dev); + + void (*release)(struct iio_buffer *buffer); + + unsigned int modes; + unsigned int flags; +}; + +/** + * struct iio_buffer - general buffer structure + * + * Note that the internals of this structure should only be of interest to + * those writing new buffer implementations. + */ +struct iio_buffer { + /** @length: Number of datums in buffer. */ + int length; + + /** @bytes_per_datum: Size of individual datum including timestamp. */ + int bytes_per_datum; + + /** + * @access: Buffer access functions associated with the + * implementation. + */ + const struct iio_buffer_access_funcs *access; + + /** @scan_mask: Bitmask used in masking scan mode elements. */ + long *scan_mask; + + /** @demux_list: List of operations required to demux the scan. */ + struct list_head demux_list; + + /** @pollq: Wait queue to allow for polling on the buffer. */ + wait_queue_head_t pollq; + + /** @watermark: Number of datums to wait for poll/read. */ + unsigned int watermark; + + /* private: */ + /* + * @scan_el_attrs: Control of scan elements if that scan mode + * control method is used. + */ + struct attribute_group *scan_el_attrs; + + /* @scan_timestamp: Does the scan mode include a timestamp. */ + bool scan_timestamp; + + /* @scan_el_dev_attr_list: List of scan element related attributes. */ + struct list_head scan_el_dev_attr_list; + + /* @buffer_group: Attributes of the buffer group. */ + struct attribute_group buffer_group; + + /* + * @scan_el_group: Attribute group for those attributes not + * created from the iio_chan_info array. + */ + struct attribute_group scan_el_group; + + /* @stufftoread: Flag to indicate new data. */ + bool stufftoread; + + /* @attrs: Standard attributes of the buffer. */ + const struct attribute **attrs; + + /* @demux_bounce: Buffer for doing gather from incoming scan. */ + void *demux_bounce; + + /* @buffer_list: Entry in the devices list of current buffers. */ + struct list_head buffer_list; + + /* @ref: Reference count of the buffer. */ + struct kref ref; +}; + +/** + * iio_update_buffers() - add or remove buffer from active list + * @indio_dev: device to add buffer to + * @insert_buffer: buffer to insert + * @remove_buffer: buffer_to_remove + * + * Note this will tear down the all buffering and build it up again + */ +int iio_update_buffers(struct iio_dev *indio_dev, + struct iio_buffer *insert_buffer, + struct iio_buffer *remove_buffer); + +/** + * iio_buffer_init() - Initialize the buffer structure + * @buffer: buffer to be initialized + **/ +void iio_buffer_init(struct iio_buffer *buffer); + +struct iio_buffer *iio_buffer_get(struct iio_buffer *buffer); +void iio_buffer_put(struct iio_buffer *buffer); + +#else /* CONFIG_IIO_BUFFER */ + +static inline void iio_buffer_get(struct iio_buffer *buffer) {} +static inline void iio_buffer_put(struct iio_buffer *buffer) {} + +#endif /* CONFIG_IIO_BUFFER */ +#endif /* _IIO_BUFFER_GENERIC_IMPL_H_ */ -- cgit v1.2.3 From ccb61f8a99e6c29df4fb96a65dad4fad740d5be9 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 22 Dec 2016 16:54:00 -0800 Subject: Drivers: hv: vmbus: Fix a rescind handling bug The host can rescind a channel that has been offered to the guest and once the channel is rescinded, the host does not respond to any requests on that channel. Deal with the case where the guest may be blocked waiting for a response from the host. Signed-off-by: K. Y. Srinivasan Cc: Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 42fe43fb0c80..7ea20bd7cdd1 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -627,6 +627,7 @@ struct vmbus_channel_msginfo { /* Synchronize the request/response if needed */ struct completion waitevent; + struct vmbus_channel *waiting_channel; union { struct vmbus_channel_version_supported version_supported; struct vmbus_channel_open_result open_result; -- cgit v1.2.3 From 874bcd00f520cac297aefade201c4efc07fc8d17 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 15 Dec 2016 20:36:33 +0100 Subject: apm-emulation: move APM_MINOR_DEV to include/linux/miscdevice.h This patch move the define for APM_MINOR_DEV to include/linux/miscdevice.h It is better that all minor number definitions are in the same place. Signed-off-by: Corentin Labbe Signed-off-by: Greg Kroah-Hartman --- include/linux/miscdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index ed30d5d713e3..0590263c462c 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -22,6 +22,7 @@ /*#define ADB_MOUSE_MINOR 10 FIXME OBSOLETE */ #define WATCHDOG_MINOR 130 /* Watchdog timer */ #define TEMP_MINOR 131 /* Temperature Sensor */ +#define APM_MINOR_DEV 134 #define RTC_MINOR 135 #define EFI_RTC_MINOR 136 /* EFI Time services */ #define VHCI_MINOR 137 -- cgit v1.2.3 From b0f2d7d546d37697d3f50753904f6f0c549b62bc Mon Sep 17 00:00:00 2001 From: Martyn Welch Date: Sat, 10 Dec 2016 23:10:41 +0000 Subject: VME: Remove node entry from vme_driver The vme_driver structure currently has a "node" entry. This entry is never used and it's intended purpose has been lost to the mists of time. Remove the entry from vme_driver to avoid confusion. Signed-off-by: Martyn Welch Signed-off-by: Greg Kroah-Hartman --- include/linux/vme.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vme.h b/include/linux/vme.h index 8c589176c2f8..ec5e8bf6118e 100644 --- a/include/linux/vme.h +++ b/include/linux/vme.h @@ -108,7 +108,6 @@ struct vme_dev { }; struct vme_driver { - struct list_head node; const char *name; int (*match)(struct vme_dev *); int (*probe)(struct vme_dev *); -- cgit v1.2.3 From e41746b0dd51f353630112558835aa62c1eb883e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 14 Dec 2016 15:33:39 +0000 Subject: debugfs: improve formatting of debugfs_real_fops() Type of debugfs_real_fops() is longer than parameters and the name, so there is no way to break the declaration nicely. We have to go over 80 characters. Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 014cc564d1c4..7574e86ff1eb 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -52,8 +52,7 @@ extern struct srcu_struct debugfs_srcu; * Must only be called under the protection established by * debugfs_use_file_start(). */ -static inline const struct file_operations * -debugfs_real_fops(const struct file *filp) +static inline const struct file_operations *debugfs_real_fops(const struct file *filp) __must_hold(&debugfs_srcu) { /* -- cgit v1.2.3 From a0244a8df15d8caee8831872b6fc97b676cb15bd Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 15 Dec 2016 19:55:54 +0100 Subject: kref: prefer atomic_inc_not_zero to atomic_add_unless On most platforms, there exists this ifdef: #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) This makes this patch functionally useless. However, on PPC, there is actually an explicit definition of atomic_inc_not_zero with its own assembly that is slightly more optimized than atomic_add_unless. So, this patch changes kref to use atomic_inc_not_zero instead, for PPC and any future platforms that might provide an explicit implementation. This also puts this usage of kref more in line with a verbatim reading of the examples in Paul McKenney's paper [1] in the section titled "2.4 Atomic Counting With Check and Release Memory Barrier", which uses atomic_inc_not_zero. [1] http://open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2167.pdf Signed-off-by: Jason A. Donenfeld Reviewed-by: Thomas Hellstrom Reviewed-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- include/linux/kref.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kref.h b/include/linux/kref.h index e15828fd71f1..62f0a84ae94e 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -133,6 +133,6 @@ static inline int kref_put_mutex(struct kref *kref, */ static inline int __must_check kref_get_unless_zero(struct kref *kref) { - return atomic_add_unless(&kref->refcount, 1, 0); + return atomic_inc_not_zero(&kref->refcount); } #endif /* _KREF_H_ */ -- cgit v1.2.3 From bb5b06750f1d9b2d632d942d8a72b35a4dd930b9 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Wed, 4 Jan 2017 13:56:28 +0530 Subject: gpio: davinci: Remove redundant members davinci_gpio_controller stuct davinci_gpio_controller struct has set_data, in_data, clr_data members that are assigned and never used. Signed-off-by: Keerthy Signed-off-by: Linus Walleij --- include/linux/platform_data/gpio-davinci.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h index 6ace3fd32b6a..44ca5303849c 100644 --- a/include/linux/platform_data/gpio-davinci.h +++ b/include/linux/platform_data/gpio-davinci.h @@ -33,9 +33,6 @@ struct davinci_gpio_controller { /* Serialize access to GPIO registers */ spinlock_t lock; void __iomem *regs; - void __iomem *set_data; - void __iomem *clr_data; - void __iomem *in_data; int gpio_unbanked; unsigned gpio_irq; }; -- cgit v1.2.3 From 568c5fe5a54f2654f5a4c599c45b8a62ed9a2013 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:42 -0800 Subject: mm: Introduce lm_alias Certain architectures may have the kernel image mapped separately to alias the linear map. Introduce a macro lm_alias to translate a kernel image symbol into its linear alias. This is used in part with work to add CONFIG_DEBUG_VIRTUAL support for arm64. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- include/linux/mm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index fe6b4036664a..5dc9c4650522 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -76,6 +76,10 @@ extern int mmap_rnd_compat_bits __read_mostly; #define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x))) #endif +#ifndef lm_alias +#define lm_alias(x) __va(__pa_symbol(x)) +#endif + /* * To prevent common memory management code establishing * a zero page mapping on a read fault. -- cgit v1.2.3 From cbbd86075a7e32b6e67f77d374b9bb53841ab1c7 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Wed, 11 Jan 2017 17:09:50 +0100 Subject: video: fbdev: imxfb: remove the macros for initializing the DMACR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current definitions of DMACR_HM() and DMACR_TM() are correct only for imx1, they're wrong for imx21. The macros are meant for legacy board files only, they're not applicable for boards using device tree. At the moment, there are no boards using these macros. So it should be safe to drop them rather than making them work for both imx1 and imx21, which would require a change in the platform data struct. Signed-off-by: Martin Kaiser Acked-by: Uwe Kleine-König Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/platform_data/video-imxfb.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/video-imxfb.h b/include/linux/platform_data/video-imxfb.h index 18e908324549..a5c0a71ec914 100644 --- a/include/linux/platform_data/video-imxfb.h +++ b/include/linux/platform_data/video-imxfb.h @@ -47,10 +47,6 @@ #define LSCR1_GRAY2(x) (((x) & 0xf) << 4) #define LSCR1_GRAY1(x) (((x) & 0xf)) -#define DMACR_BURST (1 << 31) -#define DMACR_HM(x) (((x) & 0xf) << 16) -#define DMACR_TM(x) ((x) & 0xf) - struct imx_fb_videomode { struct fb_videomode mode; u32 pcr; -- cgit v1.2.3 From 15df6d98ec3b40775918fc6ef73d7f1c2d0cf870 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Tue, 10 Jan 2017 18:48:12 +0100 Subject: power: supply: axp20x_usb_power: fix warning on 64bit Casting of_device_get_match_data return value to int causes warning on 64bit architectures. ../drivers/power/supply/axp20x_usb_power.c: In function 'axp20x_usb_power_probe': ../drivers/power/supply/axp20x_usb_power.c:297:21: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] Fixes: 0dcc70ca8644 ("power: supply: axp20x_usb_power: use of_device_id data field instead of device_is_compatible") Signed-off-by: Michal Suchanek Signed-off-by: Sebastian Reichel --- include/linux/mfd/axp20x.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 812806d6319b..f848ee86a339 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -13,7 +13,7 @@ #include -enum { +enum axp20x_variants { AXP152_ID = 0, AXP202_ID, AXP209_ID, -- cgit v1.2.3 From 818e3012c2eac4885bf7278c5144a14186d742d2 Mon Sep 17 00:00:00 2001 From: Chris Lapa Date: Wed, 11 Jan 2017 12:44:38 +1100 Subject: power: supply: bq27xxx: rename BQ27500 allow for deprecation in future. The BQ2750X definition exists only to satisfy backwards compatibility. Signed-off-by: Chris Lapa Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index bed9557b69e7..4c904d00564c 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -4,7 +4,7 @@ enum bq27xxx_chip { BQ27000 = 1, /* bq27000, bq27200 */ BQ27010, /* bq27010, bq27210 */ - BQ27500, /* bq27500 */ + BQ2750X, /* bq27500 deprecated alias */ BQ27510, /* bq27510, bq27520 */ BQ27530, /* bq27530, bq27531 */ BQ27541, /* bq27541, bq27542, bq27546, bq27742 */ -- cgit v1.2.3 From 6da6e4bdd383d8efdf50da6d0933a16ad3a590f6 Mon Sep 17 00:00:00 2001 From: Chris Lapa Date: Wed, 11 Jan 2017 12:44:39 +1100 Subject: power: supply: bq27xxx: rename BQ27510 allow for deprecation in future. The BQ2751X definition exists only to satisfy backwards compatibility. Signed-off-by: Chris Lapa Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 4c904d00564c..651f265837a4 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -5,7 +5,7 @@ enum bq27xxx_chip { BQ27000 = 1, /* bq27000, bq27200 */ BQ27010, /* bq27010, bq27210 */ BQ2750X, /* bq27500 deprecated alias */ - BQ27510, /* bq27510, bq27520 */ + BQ2751X, /* bq27510, bq27520 deprecated alias */ BQ27530, /* bq27530, bq27531 */ BQ27541, /* bq27541, bq27542, bq27546, bq27742 */ BQ27545, /* bq27545 */ -- cgit v1.2.3 From 32833635b0fe9bf71bbf867d1c3abfb5b006bf29 Mon Sep 17 00:00:00 2001 From: Chris Lapa Date: Wed, 11 Jan 2017 12:44:40 +1100 Subject: power: supply: bq27xxx: adds specific support for bq27500/1 revision. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds the BQ27500 chip definition to specifically match the bq27500/1 functionality as described in the datasheet. Signed-off-by: Chris Lapa Acked-by: Pali Rohár Reviewed-by: Andrew F. Davis Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 651f265837a4..76b623d7a9a4 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -6,6 +6,7 @@ enum bq27xxx_chip { BQ27010, /* bq27010, bq27210 */ BQ2750X, /* bq27500 deprecated alias */ BQ2751X, /* bq27510, bq27520 deprecated alias */ + BQ27500, /* bq27500/1 */ BQ27530, /* bq27530, bq27531 */ BQ27541, /* bq27541, bq27542, bq27546, bq27742 */ BQ27545, /* bq27545 */ -- cgit v1.2.3 From bd28177f3ec8367fbb3c56cfcf1c1a46e3fe240a Mon Sep 17 00:00:00 2001 From: Chris Lapa Date: Wed, 11 Jan 2017 12:44:41 +1100 Subject: power: supply: bq27xxx: adds specific support for bq27510-g1 revision. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds the BQ27510G1 chip definition to specifically match the bq27510-G1 functionality as described in the datasheet. Signed-off-by: Chris Lapa Acked-by: Pali Rohár Reviewed-by: Andrew F. Davis Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 76b623d7a9a4..3af0815426c9 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -7,6 +7,7 @@ enum bq27xxx_chip { BQ2750X, /* bq27500 deprecated alias */ BQ2751X, /* bq27510, bq27520 deprecated alias */ BQ27500, /* bq27500/1 */ + BQ27510G1, /* bq27510G1 */ BQ27530, /* bq27530, bq27531 */ BQ27541, /* bq27541, bq27542, bq27546, bq27742 */ BQ27545, /* bq27545 */ -- cgit v1.2.3 From 698a2bf5fc31d85d428a2ae495775b61381a495e Mon Sep 17 00:00:00 2001 From: Chris Lapa Date: Wed, 11 Jan 2017 12:44:42 +1100 Subject: power: supply: bq27xxx: adds specific support for bq27510-g2 revision. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds the BQ27510G2 chip definition to specifically match the bq27510-G2 functionality as described in the datasheet. Signed-off-by: Chris Lapa Acked-by: Pali Rohár Reviewed-by: Andrew F. Davis Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 3af0815426c9..79772ca231b1 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -8,6 +8,7 @@ enum bq27xxx_chip { BQ2751X, /* bq27510, bq27520 deprecated alias */ BQ27500, /* bq27500/1 */ BQ27510G1, /* bq27510G1 */ + BQ27510G2, /* bq27510G2 */ BQ27530, /* bq27530, bq27531 */ BQ27541, /* bq27541, bq27542, bq27546, bq27742 */ BQ27545, /* bq27545 */ -- cgit v1.2.3 From 71375aa7d6a7392d4968f6a562b437cb4958f956 Mon Sep 17 00:00:00 2001 From: Chris Lapa Date: Wed, 11 Jan 2017 12:44:43 +1100 Subject: power: supply: bq27xxx: adds specific support for bq27510-g3 revision. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds the BQ27510G3 chip definition to specifically match the bq27510-G3 functionality as described in the datasheet. Signed-off-by: Chris Lapa Acked-by: Pali Rohár Reviewed-by: Andrew F. Davis Tested-by: Chris Lapa Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 79772ca231b1..c45ce71a6158 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -9,6 +9,7 @@ enum bq27xxx_chip { BQ27500, /* bq27500/1 */ BQ27510G1, /* bq27510G1 */ BQ27510G2, /* bq27510G2 */ + BQ27510G3, /* bq27510G3 */ BQ27530, /* bq27530, bq27531 */ BQ27541, /* bq27541, bq27542, bq27546, bq27742 */ BQ27545, /* bq27545 */ -- cgit v1.2.3 From 68f2a813eb25bd0ef72453aeef9b1ce156157d14 Mon Sep 17 00:00:00 2001 From: Chris Lapa Date: Wed, 11 Jan 2017 12:44:44 +1100 Subject: power: supply: bq27xxx: adds specific support for bq27520-g1 revision. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds the BQ27520G1 chip definition to specifically match the bq27520-G1 functionality as described in the datasheet. Signed-off-by: Chris Lapa Acked-by: Pali Rohár Reviewed-by: Andrew F. Davis Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index c45ce71a6158..eddd96936875 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -10,6 +10,7 @@ enum bq27xxx_chip { BQ27510G1, /* bq27510G1 */ BQ27510G2, /* bq27510G2 */ BQ27510G3, /* bq27510G3 */ + BQ27520G1, /* bq27520G1 */ BQ27530, /* bq27530, bq27531 */ BQ27541, /* bq27541, bq27542, bq27546, bq27742 */ BQ27545, /* bq27545 */ -- cgit v1.2.3 From a5deb9a93040a4a221ef8a67c88ecd72cd1f3625 Mon Sep 17 00:00:00 2001 From: Chris Lapa Date: Wed, 11 Jan 2017 12:44:45 +1100 Subject: power: supply: bq27xxx: adds specific support for bq27520-g2 revision. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds the BQ27520G2 chip definition to specifically match the bq27520-G2 functionality as described in the datasheet. Signed-off-by: Chris Lapa Acked-by: Pali Rohár Reviewed-by: Andrew F. Davis Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index eddd96936875..a3fc34a4ef72 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -11,6 +11,7 @@ enum bq27xxx_chip { BQ27510G2, /* bq27510G2 */ BQ27510G3, /* bq27510G3 */ BQ27520G1, /* bq27520G1 */ + BQ27520G2, /* bq27520G2 */ BQ27530, /* bq27530, bq27531 */ BQ27541, /* bq27541, bq27542, bq27546, bq27742 */ BQ27545, /* bq27545 */ -- cgit v1.2.3 From 825e915ba2e811b91a034ac6290cd172387c5447 Mon Sep 17 00:00:00 2001 From: Chris Lapa Date: Wed, 11 Jan 2017 12:44:46 +1100 Subject: power: supply: bq27xxx: adds specific support for bq27520-g3 revision. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds the BQ27520G3 chip definition to specifically match the bq27520-G3 functionality as described in the datasheet. Signed-off-by: Chris Lapa Acked-by: Pali Rohár Reviewed-by: Andrew F. Davis Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index a3fc34a4ef72..5c12717d0f75 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -12,6 +12,7 @@ enum bq27xxx_chip { BQ27510G3, /* bq27510G3 */ BQ27520G1, /* bq27520G1 */ BQ27520G2, /* bq27520G2 */ + BQ27520G3, /* bq27520G3 */ BQ27530, /* bq27530, bq27531 */ BQ27541, /* bq27541, bq27542, bq27546, bq27742 */ BQ27545, /* bq27545 */ -- cgit v1.2.3 From 8835cae5f2abd7f7a3143afe357f416aff5517a4 Mon Sep 17 00:00:00 2001 From: Chris Lapa Date: Wed, 11 Jan 2017 12:44:47 +1100 Subject: power: supply: bq27xxx: adds specific support for bq27520-g4 revision. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds the BQ27520G4 chip definition to specifically match the bq27520-G4 functionality as described in the datasheet. Signed-off-by: Chris Lapa Acked-by: Pali Rohár Reviewed-by: Andrew F. Davis Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 5c12717d0f75..b312bcef53da 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -13,6 +13,7 @@ enum bq27xxx_chip { BQ27520G1, /* bq27520G1 */ BQ27520G2, /* bq27520G2 */ BQ27520G3, /* bq27520G3 */ + BQ27520G4, /* bq27520G4 */ BQ27530, /* bq27530, bq27531 */ BQ27541, /* bq27541, bq27542, bq27546, bq27742 */ BQ27545, /* bq27545 */ -- cgit v1.2.3 From 729204ef49ec00b788ce23deb9eb922a5769f55d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 17 Dec 2016 18:49:09 +0800 Subject: block: relax check on sg gap If the last bvec of the 1st bio and the 1st bvec of the next bio are physically contigious, and the latter can be merged to last segment of the 1st bio, we should think they don't violate sg gap(or virt boundary) limit. Both Vitaly and Dexuan reported lots of unmergeable small bios are observed when running mkfs on Hyper-V virtual storage, and performance becomes quite low. This patch fixes that performance issue. The same issue should exist on NVMe, since it sets virt boundary too. Reported-by: Vitaly Kuznetsov Reported-by: Dexuan Cui Tested-by: Dexuan Cui Cc: Keith Busch Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 83695641bd5e..b20da8dfa7ec 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1607,6 +1607,25 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, return __bvec_gap_to_prev(q, bprv, offset); } +/* + * Check if the two bvecs from two bios can be merged to one segment. + * If yes, no need to check gap between the two bios since the 1st bio + * and the 1st bvec in the 2nd bio can be handled in one segment. + */ +static inline bool bios_segs_mergeable(struct request_queue *q, + struct bio *prev, struct bio_vec *prev_last_bv, + struct bio_vec *next_first_bv) +{ + if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv)) + return false; + if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv)) + return false; + if (prev->bi_seg_back_size + next_first_bv->bv_len > + queue_max_segment_size(q)) + return false; + return true; +} + static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, struct bio *next) { @@ -1616,7 +1635,8 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, bio_get_last_bvec(prev, &pb); bio_get_first_bvec(next, &nb); - return __bvec_gap_to_prev(q, &pb, nb.bv_offset); + if (!bios_segs_mergeable(q, prev, &pb, &nb)) + return __bvec_gap_to_prev(q, &pb, nb.bv_offset); } return false; -- cgit v1.2.3 From f8a5b12247fe18f7fed801ad262a7ab190e1f848 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 13 Dec 2016 09:24:51 -0700 Subject: blk-mq: make mq_ops a const pointer We never change it, make that clear. Signed-off-by: Jens Axboe Reviewed-by: Bart Van Assche --- include/linux/blk-mq.h | 2 +- include/linux/blkdev.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 4a2ab5d99ff7..afc81d77e471 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -60,7 +60,7 @@ struct blk_mq_hw_ctx { struct blk_mq_tag_set { unsigned int *mq_map; - struct blk_mq_ops *ops; + const struct blk_mq_ops *ops; unsigned int nr_hw_queues; unsigned int queue_depth; /* max hw supported */ unsigned int reserved_tags; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b20da8dfa7ec..2e99d659b0f1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -407,7 +407,7 @@ struct request_queue { dma_drain_needed_fn *dma_drain_needed; lld_busy_fn *lld_busy_fn; - struct blk_mq_ops *mq_ops; + const struct blk_mq_ops *mq_ops; unsigned int *mq_map; -- cgit v1.2.3 From 607904c357c61adf20b8fd18af765e501d61a385 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 9 Jan 2017 10:26:52 -0500 Subject: locking/spinlocks: Remove the unused spin_lock_bh_nested() API The spin_lock_bh_nested() API is defined but is not used anywhere in the kernel. So all spin_lock_bh_nested() and related APIs are now removed. Signed-off-by: Waiman Long Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1483975612-16447-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- include/linux/spinlock.h | 8 -------- include/linux/spinlock_api_smp.h | 2 -- include/linux/spinlock_api_up.h | 1 - 3 files changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 47dd0cebd204..59248dcc6ef3 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -180,8 +180,6 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) #ifdef CONFIG_DEBUG_LOCK_ALLOC # define raw_spin_lock_nested(lock, subclass) \ _raw_spin_lock_nested(lock, subclass) -# define raw_spin_lock_bh_nested(lock, subclass) \ - _raw_spin_lock_bh_nested(lock, subclass) # define raw_spin_lock_nest_lock(lock, nest_lock) \ do { \ @@ -197,7 +195,6 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) # define raw_spin_lock_nested(lock, subclass) \ _raw_spin_lock(((void)(subclass), (lock))) # define raw_spin_lock_nest_lock(lock, nest_lock) _raw_spin_lock(lock) -# define raw_spin_lock_bh_nested(lock, subclass) _raw_spin_lock_bh(lock) #endif #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) @@ -317,11 +314,6 @@ do { \ raw_spin_lock_nested(spinlock_check(lock), subclass); \ } while (0) -#define spin_lock_bh_nested(lock, subclass) \ -do { \ - raw_spin_lock_bh_nested(spinlock_check(lock), subclass);\ -} while (0) - #define spin_lock_nest_lock(lock, nest_lock) \ do { \ raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 5344268e6e62..42dfab89e740 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -22,8 +22,6 @@ int in_lock_functions(unsigned long addr); void __lockfunc _raw_spin_lock(raw_spinlock_t *lock) __acquires(lock); void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) __acquires(lock); -void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass) - __acquires(lock); void __lockfunc _raw_spin_lock_nest_lock(raw_spinlock_t *lock, struct lockdep_map *map) __acquires(lock); diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h index d3afef9d8dbe..d0d188861ad6 100644 --- a/include/linux/spinlock_api_up.h +++ b/include/linux/spinlock_api_up.h @@ -57,7 +57,6 @@ #define _raw_spin_lock(lock) __LOCK(lock) #define _raw_spin_lock_nested(lock, subclass) __LOCK(lock) -#define _raw_spin_lock_bh_nested(lock, subclass) __LOCK(lock) #define _raw_read_lock(lock) __LOCK(lock) #define _raw_write_lock(lock) __LOCK(lock) #define _raw_spin_lock_bh(lock) __LOCK_BH(lock) -- cgit v1.2.3 From 6c0b2e833f1439ebcbd7258b655623c1aef23ffb Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 11 Jan 2017 16:32:17 +0200 Subject: soc: qcom: smem_state: Fix include for ERR_PTR() The correct include file for getting errno constants and ERR_PTR() is linux/err.h, rather than linux/errno.h, so fix the include. Fixes: e8b123e60084 ("soc: qcom: smem_state: Add stubs for disabled smem_state") Acked-by: Andy Gross Signed-off-by: Bjorn Andersson Signed-off-by: Kalle Valo --- include/linux/soc/qcom/smem_state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smem_state.h b/include/linux/soc/qcom/smem_state.h index 7b88697929e9..b8478ee7a71f 100644 --- a/include/linux/soc/qcom/smem_state.h +++ b/include/linux/soc/qcom/smem_state.h @@ -1,7 +1,7 @@ #ifndef __QCOM_SMEM_STATE__ #define __QCOM_SMEM_STATE__ -#include +#include struct device_node; struct qcom_smem_state; -- cgit v1.2.3 From 732dbf3a6104a3abfcfcd066dcaf89e5054ce009 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 22 Dec 2016 08:31:34 +0100 Subject: serial: do not accept sysrq characters via serial port many embedded boards have a disconnected TTL level serial which can generate some garbage that can lead to spurious false sysrq detects. Signed-off-by: John Crispin Signed-off-by: Felix Fietkau Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 5def8e830fb0..58484fb35cc8 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -450,7 +450,7 @@ extern void uart_handle_cts_change(struct uart_port *uport, extern void uart_insert_char(struct uart_port *port, unsigned int status, unsigned int overrun, unsigned int ch, unsigned int flag); -#ifdef SUPPORT_SYSRQ +#if defined(SUPPORT_SYSRQ) && defined(CONFIG_MAGIC_SYSRQ_SERIAL) static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { -- cgit v1.2.3 From 738b35ccee1bcd7cf4af147edd76e7880533ad9f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 11 Jan 2017 21:13:02 -0800 Subject: net: core: Make netif_wake_subqueue a wrapper netif_wake_subqueue() is duplicating the same thing that netif_tx_wake_queue() does, so make it call it directly after looking up the queue from the index. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ebd9e2c12f44..97ae0ac513ee 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3106,7 +3106,19 @@ static inline bool netif_subqueue_stopped(const struct net_device *dev, return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb)); } -void netif_wake_subqueue(struct net_device *dev, u16 queue_index); +/** + * netif_wake_subqueue - allow sending packets on subqueue + * @dev: network device + * @queue_index: sub queue index + * + * Resume individual transmit queue of a device with multiple transmit queues. + */ +static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) +{ + struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); + + netif_tx_wake_queue(txq); +} #ifdef CONFIG_XPS int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, -- cgit v1.2.3 From 6b8cc1d11ef75c5b9c530b3d0d148f3c2dd25f93 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 12 Jan 2017 11:51:32 +0100 Subject: bpf: pass original insn directly to convert_ctx_access Currently, when calling convert_ctx_access() callback for the various program types, we pass in insn->dst_reg, insn->src_reg, insn->off from the original instruction. This information is needed to rewrite the instruction that is based on the user ctx structure into a kernel representation for the ctx. As we'd like to allow access size beyond just BPF_W, we'd need also insn->code for that in order to decode the original access size. Given that, lets just pass insn directly to the convert_ctx_access() callback and work on that to not clutter the callback with even more arguments we need to pass when everything is already contained in insn. So lets go through that once, no functional change. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 94ea8d2383e6..f8c3560b01db 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -161,9 +161,10 @@ struct bpf_verifier_ops { enum bpf_reg_type *reg_type); int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, const struct bpf_prog *prog); - u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, - int src_reg, int ctx_off, - struct bpf_insn *insn, struct bpf_prog *prog); + u32 (*convert_ctx_access)(enum bpf_access_type type, + const struct bpf_insn *src, + struct bpf_insn *dst, + struct bpf_prog *prog); }; struct bpf_prog_type_list { -- cgit v1.2.3 From 3a2f5a59a695a73e0cde9a61e0feae5fa730e936 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Tue, 10 Jan 2017 12:28:32 -0500 Subject: security,selinux,smack: kill security_task_wait hook As reported by yangshukui, a permission denial from security_task_wait() can lead to a soft lockup in zap_pid_ns_processes() since it only expects sys_wait4() to return 0 or -ECHILD. Further, security_task_wait() can in general lead to zombies; in the absence of some way to automatically reparent a child process upon a denial, the hook is not useful. Remove the security hook and its implementations in SELinux and Smack. Smack already removed its check from its hook. Reported-by: yangshukui Signed-off-by: Stephen Smalley Acked-by: Casey Schaufler Acked-by: Oleg Nesterov Signed-off-by: Paul Moore --- include/linux/lsm_hooks.h | 7 ------- include/linux/security.h | 6 ------ 2 files changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 0dde95900196..6fe7a5cb0be1 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -666,11 +666,6 @@ * @sig contains the signal value. * @secid contains the sid of the process where the signal originated * Return 0 if permission is granted. - * @task_wait: - * Check permission before allowing a process to reap a child process @p - * and collect its status information. - * @p contains the task_struct for process. - * Return 0 if permission is granted. * @task_prctl: * Check permission before performing a process control operation on the * current process. @@ -1507,7 +1502,6 @@ union security_list_options { int (*task_movememory)(struct task_struct *p); int (*task_kill)(struct task_struct *p, struct siginfo *info, int sig, u32 secid); - int (*task_wait)(struct task_struct *p); int (*task_prctl)(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); void (*task_to_inode)(struct task_struct *p, struct inode *inode); @@ -1767,7 +1761,6 @@ struct security_hook_heads { struct list_head task_getscheduler; struct list_head task_movememory; struct list_head task_kill; - struct list_head task_wait; struct list_head task_prctl; struct list_head task_to_inode; struct list_head ipc_permission; diff --git a/include/linux/security.h b/include/linux/security.h index f4ebac117fa6..d3868f2ebada 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -332,7 +332,6 @@ int security_task_getscheduler(struct task_struct *p); int security_task_movememory(struct task_struct *p); int security_task_kill(struct task_struct *p, struct siginfo *info, int sig, u32 secid); -int security_task_wait(struct task_struct *p); int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); void security_task_to_inode(struct task_struct *p, struct inode *inode); @@ -980,11 +979,6 @@ static inline int security_task_kill(struct task_struct *p, return 0; } -static inline int security_task_wait(struct task_struct *p) -{ - return 0; -} - static inline int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, -- cgit v1.2.3 From d8c34b949d8c9f61e099e00f22770e400adf2b76 Mon Sep 17 00:00:00 2001 From: Gideon Israel Dsouza Date: Sat, 31 Dec 2016 21:26:23 +0530 Subject: crypto: Replaced gcc specific attributes with macros from compiler.h Continuing from this commit: 52f5684c8e1e ("kernel: use macros from compiler.h instead of __attribute__((...))") I submitted 4 total patches. They are part of task I've taken up to increase compiler portability in the kernel. I've cleaned up the subsystems under /kernel /mm /block and /security, this patch targets /crypto. There is which provides macros for various gcc specific constructs. Eg: __weak for __attribute__((weak)). I've cleaned all instances of gcc specific attributes with the right macros for the crypto subsystem. I had to make one additional change into compiler-gcc.h for the case when one wants to use this: __attribute__((aligned) and not specify an alignment factor. From the gcc docs, this will result in the largest alignment for that data type on the target machine so I've named the macro __aligned_largest. Please advise if another name is more appropriate. Signed-off-by: Gideon Israel Dsouza Signed-off-by: Herbert Xu --- include/linux/compiler-gcc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 0444b1336268..fddd1a5eb322 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -116,6 +116,7 @@ */ #define __pure __attribute__((pure)) #define __aligned(x) __attribute__((aligned(x))) +#define __aligned_largest __attribute__((aligned)) #define __printf(a, b) __attribute__((format(printf, a, b))) #define __scanf(a, b) __attribute__((format(scanf, a, b))) #define __attribute_const__ __attribute__((__const__)) -- cgit v1.2.3 From 2cf8e2dfdf88363476f23bc600745250b94dbbed Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 12 Jan 2017 11:17:39 +0000 Subject: regmap: Fixup the kernel-doc comments on functions/structures Most of the kernel-doc comments in regmap don't actually generate correctly. This patch fixes up a few common issues, corrects some typos and adds some missing argument descriptions. The most common issues being using a : after the function name which causes the short description to not render correctly and not separating the long and short descriptions of the function. There are quite a few instances of arguments not being described or given the wrong name as well. This patch doesn't fixup functions/structures that are currently missing descriptions. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- include/linux/regmap.h | 115 ++++++++++++++++++++++++++++--------------------- 1 file changed, 67 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 9adc7b21903d..38a85d50fefd 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -39,12 +39,13 @@ enum regcache_type { }; /** - * Default value for a register. We use an array of structs rather - * than a simple array as many modern devices have very sparse - * register maps. + * struct reg_default - Default value for a register. * * @reg: Register address. * @def: Register default value. + * + * We use an array of structs rather than a simple array as many modern devices + * have very sparse register maps. */ struct reg_default { unsigned int reg; @@ -52,12 +53,14 @@ struct reg_default { }; /** - * Register/value pairs for sequences of writes with an optional delay in - * microseconds to be applied after each write. + * struct reg_sequence - An individual write from a sequence of writes. * * @reg: Register address. * @def: Register value. * @delay_us: Delay to be applied after the register write in microseconds + * + * Register/value pairs for sequences of writes with an optional delay in + * microseconds to be applied after each write. */ struct reg_sequence { unsigned int reg; @@ -97,6 +100,7 @@ struct reg_sequence { /** * regmap_read_poll_timeout - Poll until a condition is met or a timeout occurs + * * @map: Regmap to read from * @addr: Address to poll * @val: Unsigned integer variable to read the value into @@ -145,8 +149,8 @@ enum regmap_endian { }; /** - * A register range, used for access related checks - * (readable/writeable/volatile/precious checks) + * struct regmap_range - A register range, used for access related checks + * (readable/writeable/volatile/precious checks) * * @range_min: address of first register * @range_max: address of last register @@ -158,16 +162,18 @@ struct regmap_range { #define regmap_reg_range(low, high) { .range_min = low, .range_max = high, } -/* - * A table of ranges including some yes ranges and some no ranges. - * If a register belongs to a no_range, the corresponding check function - * will return false. If a register belongs to a yes range, the corresponding - * check function will return true. "no_ranges" are searched first. +/** + * struct regmap_access_table - A table of register ranges for access checks * * @yes_ranges : pointer to an array of regmap ranges used as "yes ranges" * @n_yes_ranges: size of the above array * @no_ranges: pointer to an array of regmap ranges used as "no ranges" * @n_no_ranges: size of the above array + * + * A table of ranges including some yes ranges and some no ranges. + * If a register belongs to a no_range, the corresponding check function + * will return false. If a register belongs to a yes range, the corresponding + * check function will return true. "no_ranges" are searched first. */ struct regmap_access_table { const struct regmap_range *yes_ranges; @@ -180,7 +186,7 @@ typedef void (*regmap_lock)(void *); typedef void (*regmap_unlock)(void *); /** - * Configuration for the register map of a device. + * struct regmap_config - Configuration for the register map of a device. * * @name: Optional name of the regmap. Useful when a device has multiple * register regions. @@ -313,22 +319,24 @@ struct regmap_config { }; /** - * Configuration for indirectly accessed or paged registers. - * Registers, mapped to this virtual range, are accessed in two steps: - * 1. page selector register update; - * 2. access through data window registers. + * struct regmap_range_cfg - Configuration for indirectly accessed or paged + * registers. * * @name: Descriptive name for diagnostics * * @range_min: Address of the lowest register address in virtual range. * @range_max: Address of the highest register in virtual range. * - * @page_sel_reg: Register with selector field. - * @page_sel_mask: Bit shift for selector value. - * @page_sel_shift: Bit mask for selector value. + * @selector_reg: Register with selector field. + * @selector_mask: Bit shift for selector value. + * @selector_shift: Bit mask for selector value. * * @window_start: Address of first (lowest) register in data window. * @window_len: Number of registers in data window. + * + * Registers, mapped to this virtual range, are accessed in two steps: + * 1. page selector register update; + * 2. access through data window registers. */ struct regmap_range_cfg { const char *name; @@ -371,7 +379,8 @@ typedef struct regmap_async *(*regmap_hw_async_alloc)(void); typedef void (*regmap_hw_free_context)(void *context); /** - * Description of a hardware bus for the register map infrastructure. + * struct regmap_bus - Description of a hardware bus for the register map + * infrastructure. * * @fast_io: Register IO is fast. Use a spinlock instead of a mutex * to perform locking. This field is ignored if custom lock/unlock @@ -384,6 +393,10 @@ typedef void (*regmap_hw_free_context)(void *context); * must serialise with respect to non-async I/O. * @reg_write: Write a single register value to the given register address. This * write operation has to complete when returning from the function. + * @reg_update_bits: Update bits operation to be used against volatile + * registers, intended for devices supporting some mechanism + * for setting clearing bits without having to + * read/modify/write. * @read: Read operation. Data is returned in the buffer used to transmit * data. * @reg_read: Read a single register value from a given register address. @@ -513,7 +526,7 @@ struct regmap *__devm_regmap_init_ac97(struct snd_ac97 *ac97, #endif /** - * regmap_init(): Initialise register map + * regmap_init() - Initialise register map * * @dev: Device that will be interacted with * @bus: Bus-specific callbacks to use with device @@ -531,7 +544,7 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, const struct regmap_config *config); /** - * regmap_init_i2c(): Initialise register map + * regmap_init_i2c() - Initialise register map * * @i2c: Device that will be interacted with * @config: Configuration for register map @@ -544,9 +557,9 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, i2c, config) /** - * regmap_init_spi(): Initialise register map + * regmap_init_spi() - Initialise register map * - * @spi: Device that will be interacted with + * @dev: Device that will be interacted with * @config: Configuration for register map * * The return value will be an ERR_PTR() on error or a valid pointer to @@ -557,8 +570,9 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, dev, config) /** - * regmap_init_spmi_base(): Create regmap for the Base register space - * @sdev: SPMI device that will be interacted with + * regmap_init_spmi_base() - Create regmap for the Base register space + * + * @dev: SPMI device that will be interacted with * @config: Configuration for register map * * The return value will be an ERR_PTR() on error or a valid pointer to @@ -569,8 +583,9 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, dev, config) /** - * regmap_init_spmi_ext(): Create regmap for Ext register space - * @sdev: Device that will be interacted with + * regmap_init_spmi_ext() - Create regmap for Ext register space + * + * @dev: Device that will be interacted with * @config: Configuration for register map * * The return value will be an ERR_PTR() on error or a valid pointer to @@ -581,7 +596,7 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, dev, config) /** - * regmap_init_mmio_clk(): Initialise register map with register clock + * regmap_init_mmio_clk() - Initialise register map with register clock * * @dev: Device that will be interacted with * @clk_id: register clock consumer ID @@ -596,7 +611,7 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, dev, clk_id, regs, config) /** - * regmap_init_mmio(): Initialise register map + * regmap_init_mmio() - Initialise register map * * @dev: Device that will be interacted with * @regs: Pointer to memory-mapped IO region @@ -609,7 +624,7 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, regmap_init_mmio_clk(dev, NULL, regs, config) /** - * regmap_init_ac97(): Initialise AC'97 register map + * regmap_init_ac97() - Initialise AC'97 register map * * @ac97: Device that will be interacted with * @config: Configuration for register map @@ -623,7 +638,7 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); /** - * devm_regmap_init(): Initialise managed register map + * devm_regmap_init() - Initialise managed register map * * @dev: Device that will be interacted with * @bus: Bus-specific callbacks to use with device @@ -640,7 +655,7 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); dev, bus, bus_context, config) /** - * devm_regmap_init_i2c(): Initialise managed register map + * devm_regmap_init_i2c() - Initialise managed register map * * @i2c: Device that will be interacted with * @config: Configuration for register map @@ -654,9 +669,9 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); i2c, config) /** - * devm_regmap_init_spi(): Initialise register map + * devm_regmap_init_spi() - Initialise register map * - * @spi: Device that will be interacted with + * @dev: Device that will be interacted with * @config: Configuration for register map * * The return value will be an ERR_PTR() on error or a valid pointer @@ -668,8 +683,9 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); dev, config) /** - * devm_regmap_init_spmi_base(): Create managed regmap for Base register space - * @sdev: SPMI device that will be interacted with + * devm_regmap_init_spmi_base() - Create managed regmap for Base register space + * + * @dev: SPMI device that will be interacted with * @config: Configuration for register map * * The return value will be an ERR_PTR() on error or a valid pointer @@ -681,8 +697,9 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); dev, config) /** - * devm_regmap_init_spmi_ext(): Create managed regmap for Ext register space - * @sdev: SPMI device that will be interacted with + * devm_regmap_init_spmi_ext() - Create managed regmap for Ext register space + * + * @dev: SPMI device that will be interacted with * @config: Configuration for register map * * The return value will be an ERR_PTR() on error or a valid pointer @@ -694,7 +711,7 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); dev, config) /** - * devm_regmap_init_mmio_clk(): Initialise managed register map with clock + * devm_regmap_init_mmio_clk() - Initialise managed register map with clock * * @dev: Device that will be interacted with * @clk_id: register clock consumer ID @@ -710,7 +727,7 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); dev, clk_id, regs, config) /** - * devm_regmap_init_mmio(): Initialise managed register map + * devm_regmap_init_mmio() - Initialise managed register map * * @dev: Device that will be interacted with * @regs: Pointer to memory-mapped IO region @@ -724,7 +741,7 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); devm_regmap_init_mmio_clk(dev, NULL, regs, config) /** - * devm_regmap_init_ac97(): Initialise AC'97 register map + * devm_regmap_init_ac97() - Initialise AC'97 register map * * @ac97: Device that will be interacted with * @config: Configuration for register map @@ -799,7 +816,7 @@ bool regmap_reg_in_ranges(unsigned int reg, unsigned int nranges); /** - * Description of an register field + * struct reg_field - Description of an register field * * @reg: Offset of the register within the regmap bank * @lsb: lsb of the register field. @@ -840,7 +857,7 @@ int regmap_fields_update_bits_base(struct regmap_field *field, unsigned int id, bool *change, bool async, bool force); /** - * Description of an IRQ for the generic regmap irq_chip. + * struct regmap_irq - Description of an IRQ for the generic regmap irq_chip. * * @reg_offset: Offset of the status/mask register within the bank * @mask: Mask used to flag/control the register. @@ -860,9 +877,7 @@ struct regmap_irq { [_irq] = { .reg_offset = (_off), .mask = (_mask) } /** - * Description of a generic regmap irq_chip. This is not intended to - * handle every possible interrupt controller, but it should handle a - * substantial proportion of those that are found in the wild. + * struct regmap_irq_chip - Description of a generic regmap irq_chip. * * @name: Descriptive name for IRQ controller. * @@ -896,6 +911,10 @@ struct regmap_irq { * after handling the interrupts in regmap_irq_handler(). * @irq_drv_data: Driver specific IRQ data which is passed as parameter when * driver specific pre/post interrupt handler is called. + * + * This is not intended to handle every possible interrupt controller, but + * it should handle a substantial proportion of those that are found in the + * wild. */ struct regmap_irq_chip { const char *name; -- cgit v1.2.3 From 8fb472c09b9df478a062eacc7841448e40fc3c17 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 12 Jan 2017 15:53:33 +0100 Subject: ipmr: improve hash scalability Recently we started using ipmr with thousands of entries and easily hit soft lockups on smaller devices. The reason is that the hash function uses the high order bits from the src and dst, but those don't change in many common cases, also the hash table is only 64 elements so with thousands it doesn't scale at all. This patch migrates the hash table to rhashtable, and in particular the rhl interface which allows for duplicate elements to be chained because of the MFC_PROXY support (*,G; *,*,oif cases) which allows for multiple duplicate entries to be added with different interfaces (IMO wrong, but it's been in for a long time). And here are some results from tests I've run in a VM: mr_table size (default, allocated for all namespaces): Before After 49304 bytes 2400 bytes Add 65000 routes (the diff is much larger on smaller devices): Before After 1m42s 58s Forwarding 256 byte packets with 65000 routes (test done in a VM): Before After 3 Mbps / ~1465 pps 122 Mbps / ~59000 pps As a bonus we no longer see the soft lockups on smaller devices which showed up even with 2000 entries before. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 57 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index f019b62f27b5..d7f63339ef0b 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -60,7 +61,6 @@ struct vif_device { #define VIFF_STATIC 0x8000 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) -#define MFC_LINES 64 struct mr_table { struct list_head list; @@ -69,8 +69,9 @@ struct mr_table { struct sock __rcu *mroute_sk; struct timer_list ipmr_expire_timer; struct list_head mfc_unres_queue; - struct list_head mfc_cache_array[MFC_LINES]; struct vif_device vif_table[MAXVIFS]; + struct rhltable mfc_hash; + struct list_head mfc_cache_list; int maxvif; atomic_t cache_resolve_queue_len; bool mroute_do_assert; @@ -85,17 +86,48 @@ enum { MFC_STATIC = BIT(0), }; +struct mfc_cache_cmp_arg { + __be32 mfc_mcastgrp; + __be32 mfc_origin; +}; + +/** + * struct mfc_cache - multicast routing entries + * @mnode: rhashtable list + * @mfc_mcastgrp: destination multicast group address + * @mfc_origin: source address + * @cmparg: used for rhashtable comparisons + * @mfc_parent: source interface (iif) + * @mfc_flags: entry flags + * @expires: unresolved entry expire time + * @unresolved: unresolved cached skbs + * @last_assert: time of last assert + * @minvif: minimum VIF id + * @maxvif: maximum VIF id + * @bytes: bytes that have passed for this entry + * @pkt: packets that have passed for this entry + * @wrong_if: number of wrong source interface hits + * @lastuse: time of last use of the group (traffic or update) + * @ttls: OIF TTL threshold array + * @list: global entry list + * @rcu: used for entry destruction + */ struct mfc_cache { - struct list_head list; - __be32 mfc_mcastgrp; /* Group the entry belongs to */ - __be32 mfc_origin; /* Source of packet */ - vifi_t mfc_parent; /* Source interface */ - int mfc_flags; /* Flags on line */ + struct rhlist_head mnode; + union { + struct { + __be32 mfc_mcastgrp; + __be32 mfc_origin; + }; + struct mfc_cache_cmp_arg cmparg; + }; + vifi_t mfc_parent; + int mfc_flags; union { struct { unsigned long expires; - struct sk_buff_head unresolved; /* Unresolved buffers */ + struct sk_buff_head unresolved; } unres; struct { unsigned long last_assert; @@ -105,18 +137,13 @@ struct mfc_cache { unsigned long pkt; unsigned long wrong_if; unsigned long lastuse; - unsigned char ttls[MAXVIFS]; /* TTL thresholds */ + unsigned char ttls[MAXVIFS]; } res; } mfc_un; + struct list_head list; struct rcu_head rcu; }; -#ifdef __BIG_ENDIAN -#define MFC_HASH(a,b) (((((__force u32)(__be32)a)>>24)^(((__force u32)(__be32)b)>>26))&(MFC_LINES-1)) -#else -#define MFC_HASH(a,b) ((((__force u32)(__be32)a)^(((__force u32)(__be32)b)>>2))&(MFC_LINES-1)) -#endif - struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, -- cgit v1.2.3 From 950b0d91dc108f54bccca5a2f75bb46f2df63d29 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 11 Jan 2017 14:13:34 -0800 Subject: pinctrl: core: Fix regression caused by delayed work for hogs Commit df61b366af26 ("pinctrl: core: Use delayed work for hogs") caused a regression at least with sh-pfc that is also a GPIO controller as noted by Geert Uytterhoeven . As the original pinctrl_register() has issues calling pin controller driver functions early before the controller has finished registering, we can't just revert commit df61b366af26. That would break the drivers using GENERIC_PINCTRL_GROUPS or GENERIC_PINMUX_FUNCTIONS. So let's fix the issue with the following steps as a single patch: 1. Revert the late_init parts of commit df61b366af26. The late_init clearly won't work and we have to just give up on fixing pinctrl_register() for GENERIC_PINCTRL_GROUPS and GENERIC_PINMUX_FUNCTIONS. 2. Split pinctrl_register() into two parts By splitting pinctrl_register() into pinctrl_init_controller() and pinctrl_create_and_start() we have better control over when it's safe to call pinctrl_create(). 3. Introduce a new pinctrl_register_and_init() function As suggested by Linus Walleij , we can just introduce a new function for the controllers that need pinctrl_create() called later. 4. Convert the four known problem cases to use new function Let's convert pinctrl-imx, pinctrl-single, sh-pfc and ti-iodelay to use the new function to fix the issues. The rest of the drivers can be converted later. Let's also update Documentation/pinctrl.txt accordingly because of the known issues with pinctrl_register(). Fixes: df61b366af26 ("pinctrl: core: Use delayed work for hogs") Reported-by: Geert Uytterhoeven Cc: Gary Bisson Signed-off-by: Tony Lindgren Tested-by: Geert Uytterhoeven Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinctrl.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index a42e57da270d..8ce2d87a238b 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -141,12 +141,27 @@ struct pinctrl_desc { }; /* External interface to pin controller */ + +extern int pinctrl_register_and_init(struct pinctrl_desc *pctldesc, + struct device *dev, void *driver_data, + struct pinctrl_dev **pctldev); + +/* Please use pinctrl_register_and_init() instead */ extern struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data); + extern void pinctrl_unregister(struct pinctrl_dev *pctldev); + +extern int devm_pinctrl_register_and_init(struct device *dev, + struct pinctrl_desc *pctldesc, + void *driver_data, + struct pinctrl_dev **pctldev); + +/* Please use devm_pinctrl_register_and_init() instead */ extern struct pinctrl_dev *devm_pinctrl_register(struct device *dev, struct pinctrl_desc *pctldesc, void *driver_data); + extern void devm_pinctrl_unregister(struct device *dev, struct pinctrl_dev *pctldev); -- cgit v1.2.3 From 4fe0395550aeb6709ea5332f46de3644aef7d328 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2017 21:37:40 +0100 Subject: PCI/MSI: Remove pci_enable_msi_{exact,range}() All multi-MSI allocations are now done through pci_irq_alloc_vectors(), so remove the old pci_enable_msi_range() and pci_enable_msi_exact() interfaces. Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index e2d1a124216a..2159376dc673 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1309,14 +1309,7 @@ void pci_msix_shutdown(struct pci_dev *dev); void pci_disable_msix(struct pci_dev *dev); void pci_restore_msi_state(struct pci_dev *dev); int pci_msi_enabled(void); -int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec); -static inline int pci_enable_msi_exact(struct pci_dev *dev, int nvec) -{ - int rc = pci_enable_msi_range(dev, nvec, nvec); - if (rc < 0) - return rc; - return 0; -} +int pci_enable_msi(struct pci_dev *dev); int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec); static inline int pci_enable_msix_exact(struct pci_dev *dev, @@ -1347,10 +1340,7 @@ static inline void pci_msix_shutdown(struct pci_dev *dev) { } static inline void pci_disable_msix(struct pci_dev *dev) { } static inline void pci_restore_msi_state(struct pci_dev *dev) { } static inline int pci_msi_enabled(void) { return 0; } -static inline int pci_enable_msi_range(struct pci_dev *dev, int minvec, - int maxvec) -{ return -ENOSYS; } -static inline int pci_enable_msi_exact(struct pci_dev *dev, int nvec) +static inline int pci_enable_msi(struct pci_dev *dev) { return -ENOSYS; } static inline int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec) @@ -1426,8 +1416,6 @@ static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { } static inline void pcie_ecrc_get_policy(char *str) { } #endif -#define pci_enable_msi(pdev) pci_enable_msi_exact(pdev, 1) - #ifdef CONFIG_HT_IRQ /* The functions a driver should call */ int ht_create_irq(struct pci_dev *dev, int idx); -- cgit v1.2.3 From deed7be78f512d003c6290da0a781479b31b3d74 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jan 2017 22:11:32 -0800 Subject: tcp: record most recent RTT in RACK loss detection Record the most recent RTT in RACK. It is often identical to the "ca_rtt_us" values in tcp_clean_rtx_queue. But when the packet has been retransmitted, RACK choses to believe the ACK is for the (latest) retransmitted packet if the RTT is over minimum RTT. This requires passing the arrival time of the most recent ACK to RACK routines. The timestamp is now recorded in the "ack_time" in tcp_sacktag_state during the ACK processing. This patch does not change the RACK algorithm itself. It only adds the RTT variable to prepare the next main patch. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fc5848dad7a4..1255c592719c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -207,6 +207,7 @@ struct tcp_sock { /* Information of the most recently (s)acked skb */ struct tcp_rack { struct skb_mstamp mstamp; /* (Re)sent time of the skb */ + u32 rtt_us; /* Associated RTT */ u8 advanced; /* mstamp advanced since last lost marking */ u8 reord; /* reordering detected */ } rack; -- cgit v1.2.3 From 1d0833df594390876647c54c2c88069d29059665 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jan 2017 22:11:34 -0800 Subject: tcp: use sequence to break TS ties for RACK loss detection The packets inside a jumbo skb (e.g., TSO) share the same skb timestamp, even though they are sent sequentially on the wire. Since RACK is based on time, it can not detect some packets inside the same skb are lost. However, we can leverage the packet sequence numbers as extended timestamps to detect losses. Therefore, when RACK timestamp is identical to skb's timestamp (i.e., one of the packets of the skb is acked or sacked), we use the sequence numbers of the acked and unacked packets to break ties. We can use the same sequence logic to advance RACK xmit time as well to detect more losses and avoid timeout. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1255c592719c..970d5f00589f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -208,6 +208,7 @@ struct tcp_sock { struct tcp_rack { struct skb_mstamp mstamp; /* (Re)sent time of the skb */ u32 rtt_us; /* Associated RTT */ + u32 end_seq; /* Ending TCP sequence of the skb */ u8 advanced; /* mstamp advanced since last lost marking */ u8 reord; /* reordering detected */ } rack; -- cgit v1.2.3 From 840a3cbe89694fad75578856976f180e852e69aa Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jan 2017 22:11:38 -0800 Subject: tcp: remove forward retransmit feature Forward retransmit is an esoteric feature in RFC3517 (condition(3) in the NextSeg()). Basically if a packet is not considered lost by the current criteria (# of dupacks etc), but the congestion window has room for more packets, then retransmit this packet. However it actually conflicts with the rest of recovery design. For example, when reordering is detected we want to be conservative in retransmitting packets but forward-retransmit feature would break that to force more retransmission. Also the implementation is fairly complicated inside the retransmission logic inducing extra iterations in the write queue. With RACK losses are being detected timely and this heuristic is no longer necessary. There this patch removes the feature. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 970d5f00589f..8e5f4c15d0e5 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -307,7 +307,6 @@ struct tcp_sock { */ int lost_cnt_hint; - u32 retransmit_high; /* L-bits may be on up to this seqno */ u32 prior_ssthresh; /* ssthresh saved at recovery start */ u32 high_seq; /* snd_nxt at onset of congestion */ -- cgit v1.2.3 From bec41a11dd3dc8c54f766b4f494140ca92ba7c10 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jan 2017 22:11:39 -0800 Subject: tcp: remove early retransmit This patch removes the support of RFC5827 early retransmit (i.e., fast recovery on small inflight with <3 dupacks) because it is subsumed by the new RACK loss detection. More specifically when RACK receives DUPACKs, it'll arm a reordering timer to start fast recovery after a quarter of (min)RTT, hence it covers the early retransmit except RACK does not limit itself to specific inflight or dupack numbers. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 8e5f4c15d0e5..4733368f953a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -224,8 +224,7 @@ struct tcp_sock { repair : 1, frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */ u8 repair_queue; - u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ - syn_data:1, /* SYN includes data */ + u8 syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ -- cgit v1.2.3 From 4a7f6009441144783e5925551c72e3f2e1b0839b Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 12 Jan 2017 22:11:41 -0800 Subject: tcp: remove thin_dupack feature Thin stream DUPACK is to start fast recovery on only one DUPACK provided the connection is a thin stream (i.e., low inflight). But this older feature is now subsumed with RACK. If a connection receives only a single DUPACK, RACK would arm a reordering timer and soon starts fast recovery instead of timeout if no further ACKs are received. The socket option (THIN_DUPACK) is kept as a nop for compatibility. Note that this patch does not change another thin-stream feature which enables linear RTO. Although it might be good to generalize that in the future (i.e., linear RTO for the first say 3 retries). Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4733368f953a..6c22332afb75 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -220,7 +220,7 @@ struct tcp_sock { unused:5; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ - thin_dupack : 1,/* Fast retransmit on first dupack */ + unused1 : 1, repair : 1, frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */ u8 repair_queue; -- cgit v1.2.3 From a665ece8b471de45bc19af05d52a1eaa5bc06dca Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 12 Jan 2017 13:23:31 +0200 Subject: x86/platform/intel: Remove PMIC GPIO block support Moorestown support was removed by commit: 1a8359e411eb ("x86/mid: Remove Intel Moorestown") Remove this leftover. Signed-off-by: Andy Shevchenko Cc: Darren Hart Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: platform-driver-x86@vger.kernel.org Link: http://lkml.kernel.org/r/20170112112331.93236-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/intel_pmic_gpio.h | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 include/linux/intel_pmic_gpio.h (limited to 'include/linux') diff --git a/include/linux/intel_pmic_gpio.h b/include/linux/intel_pmic_gpio.h deleted file mode 100644 index 920109a29191..000000000000 --- a/include/linux/intel_pmic_gpio.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef LINUX_INTEL_PMIC_H -#define LINUX_INTEL_PMIC_H - -struct intel_pmic_gpio_platform_data { - /* the first IRQ of the chip */ - unsigned irq_base; - /* number assigned to the first GPIO */ - unsigned gpio_base; - /* sram address for gpiointr register, the langwell chip will map - * the PMIC spi GPIO expander's GPIOINTR register in sram. - */ - unsigned gpiointr; -}; - -#endif -- cgit v1.2.3 From 5b485629ba0d5d027880769ff467c587b24b4bde Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 8 Jan 2017 23:58:09 +0900 Subject: kprobes, extable: Identify kprobes trampolines as kernel text area Improve __kernel_text_address()/kernel_text_address() to return true if the given address is on a kprobe's instruction slot trampoline. This can help stacktraces to determine the address is on a text area or not. To implement this atomically in is_kprobe_*_slot(), also change the insn_cache page list to an RCU list. This changes timings a bit (it delays page freeing to the RCU garbage collection phase), but none of that is in the hot path. Note: this change can add small overhead to stack unwinders because it adds 2 additional checks to __kernel_text_address(). However, the impact should be very small, because kprobe_insn_pages list has 1 entry per 256 probes(on x86, on arm/arm64 it will be 1024 probes), and kprobe_optinsn_pages has 1 entry per 32 probes(on x86). In most use cases, the number of kprobe events may be less than 20, which means that is_kprobe_*_slot() will check just one entry. Tested-by: Josh Poimboeuf Signed-off-by: Masami Hiramatsu Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Ananth N Mavinakayanahalli Cc: Andrew Morton Cc: Andrey Konovalov Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/148388747896.6869.6354262871751682264.stgit@devbox [ Improved the changelog and coding style. ] Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 8f6849084248..16ddfb8b304a 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -278,9 +278,13 @@ struct kprobe_insn_cache { int nr_garbage; }; +#ifdef __ARCH_WANT_KPROBES_INSN_SLOT extern kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c); extern void __free_insn_slot(struct kprobe_insn_cache *c, kprobe_opcode_t *slot, int dirty); +/* sleep-less address checking routine */ +extern bool __is_insn_slot_addr(struct kprobe_insn_cache *c, + unsigned long addr); #define DEFINE_INSN_CACHE_OPS(__name) \ extern struct kprobe_insn_cache kprobe_##__name##_slots; \ @@ -294,6 +298,18 @@ static inline void free_##__name##_slot(kprobe_opcode_t *slot, int dirty)\ { \ __free_insn_slot(&kprobe_##__name##_slots, slot, dirty); \ } \ + \ +static inline bool is_kprobe_##__name##_slot(unsigned long addr) \ +{ \ + return __is_insn_slot_addr(&kprobe_##__name##_slots, addr); \ +} +#else /* __ARCH_WANT_KPROBES_INSN_SLOT */ +#define DEFINE_INSN_CACHE_OPS(__name) \ +static inline bool is_kprobe_##__name##_slot(unsigned long addr) \ +{ \ + return 0; \ +} +#endif DEFINE_INSN_CACHE_OPS(insn); @@ -330,7 +346,6 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); #endif - #endif /* CONFIG_OPTPROBES */ #ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, @@ -481,6 +496,19 @@ static inline int enable_jprobe(struct jprobe *jp) return enable_kprobe(&jp->kp); } +#ifndef CONFIG_KPROBES +static inline bool is_kprobe_insn_slot(unsigned long addr) +{ + return false; +} +#endif +#ifndef CONFIG_OPTPROBES +static inline bool is_kprobe_optinsn_slot(unsigned long addr) +{ + return false; +} +#endif + #ifdef CONFIG_KPROBES /* * Blacklist ganerating macro. Specify functions which is not probed -- cgit v1.2.3 From c31cc6a5187e8b09ccee34f81728a90f80e872e7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 5 Jan 2017 18:11:43 +0100 Subject: sched/cputime: Allow accounting system time using cpustat index In order to prepare for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y to delay cputime accounting to the tick, let's provide APIs to account system time to precise contexts: hardirq, softirq, pure system, ... Inspired-by: Martin Schwidefsky Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Benjamin Herrenschmidt Cc: Christian Borntraeger Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1483636310-6557-4-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 00f776816aa3..14b63bb714d4 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -80,6 +80,8 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) extern void account_user_time(struct task_struct *, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t); +extern void account_system_index_time(struct task_struct *, cputime_t, + enum cpu_usage_stat); extern void account_steal_time(cputime_t); extern void account_idle_time(cputime_t); -- cgit v1.2.3 From 1213699ab426608ff1925ab263dd6925102bb92a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 5 Jan 2017 18:11:44 +0100 Subject: sched/cputime: Export account_guest_time() In order to prepare for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y to delay cputime accounting to the tick, let's allow archs to account cputime directly to gtime. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Benjamin Herrenschmidt Cc: Christian Borntraeger Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1483636310-6557-5-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 14b63bb714d4..cfd6c0c6d4e8 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -79,6 +79,7 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) } extern void account_user_time(struct task_struct *, cputime_t); +extern void account_guest_time(struct task_struct *, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t); extern void account_system_index_time(struct task_struct *, cputime_t, enum cpu_usage_stat); -- cgit v1.2.3 From c8d7dabf8f91fadd265e6eb87afb201d14ea299b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 5 Jan 2017 18:11:50 +0100 Subject: sched/cputime: Rename vtime_account_user() to vtime_flush() CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y used to accumulate user time and account it on ticks and context switches only through the vtime_account_user() function. Now this model has been generalized on the 3 archs for all kind of cputime (system, irq, ...) and all the cputime flushing happens under vtime_account_user(). So let's rename this function to better reflect its new role. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Acked-by: Martin Schwidefsky Cc: Benjamin Herrenschmidt Cc: Christian Borntraeger Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1483636310-6557-11-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 2 +- include/linux/vtime.h | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index cfd6c0c6d4e8..c3e38ded2d73 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -89,7 +89,7 @@ extern void account_idle_time(cputime_t); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static inline void account_process_tick(struct task_struct *tsk, int user) { - vtime_account_user(tsk); + vtime_flush(tsk); } #else extern void account_process_tick(struct task_struct *, int user); diff --git a/include/linux/vtime.h b/include/linux/vtime.h index aa9bfea8804a..0681fe25abeb 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -58,27 +58,28 @@ static inline void vtime_task_switch(struct task_struct *prev) extern void vtime_account_system(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); -extern void vtime_account_user(struct task_struct *tsk); #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } -static inline void vtime_account_user(struct task_struct *tsk) { } #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void arch_vtime_task_switch(struct task_struct *tsk); +extern void vtime_account_user(struct task_struct *tsk); extern void vtime_user_enter(struct task_struct *tsk); static inline void vtime_user_exit(struct task_struct *tsk) { vtime_account_user(tsk); } + extern void vtime_guest_enter(struct task_struct *tsk); extern void vtime_guest_exit(struct task_struct *tsk); extern void vtime_init_idle(struct task_struct *tsk, int cpu); #else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */ +static inline void vtime_account_user(struct task_struct *tsk) { } static inline void vtime_user_enter(struct task_struct *tsk) { } static inline void vtime_user_exit(struct task_struct *tsk) { } static inline void vtime_guest_enter(struct task_struct *tsk) { } @@ -93,9 +94,11 @@ static inline void vtime_account_irq_exit(struct task_struct *tsk) /* On hard|softirq exit we always account to hard|softirq cputime */ vtime_account_system(tsk); } +extern void vtime_flush(struct task_struct *tsk); #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static inline void vtime_account_irq_enter(struct task_struct *tsk) { } static inline void vtime_account_irq_exit(struct task_struct *tsk) { } +static inline void vtime_flush(struct task_struct *tsk) { } #endif -- cgit v1.2.3 From 642fa448ae6b3a4e5e8737054a094173405b7643 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 3 Jan 2017 13:43:14 -0800 Subject: sched/core: Remove set_task_state() This is a nasty interface and setting the state of a foreign task must not be done. As of the following commit: be628be0956 ("bcache: Make gc wakeup sane, remove set_task_state()") ... everyone in the kernel calls set_task_state() with current, allowing the helper to be removed. However, as the comment indicates, it is still around for those archs where computing current is more expensive than using a pointer, at least in theory. An important arch that is affected is arm64, however this has been addressed now [1] and performance is up to par making no difference with either calls. Of all the callers, if any, it's the locking bits that would care most about this -- ie: we end up passing a tsk pointer to a lot of the lock slowpath, and setting ->state on that. The following numbers are based on two tests: a custom ad-hoc microbenchmark that just measures latencies (for ~65 million calls) between get_task_state() vs get_current_state(). Secondly for a higher overview, an unlink microbenchmark was used, which pounds on a single file with open, close,unlink combos with increasing thread counts (up to 4x ncpus). While the workload is quite unrealistic, it does contend a lot on the inode mutex or now rwsem. [1] https://lkml.kernel.org/r/1483468021-8237-1-git-send-email-mark.rutland@arm.com == 1. x86-64 == Avg runtime set_task_state(): 601 msecs Avg runtime set_current_state(): 552 msecs vanilla dirty Hmean unlink1-processes-2 36089.26 ( 0.00%) 38977.33 ( 8.00%) Hmean unlink1-processes-5 28555.01 ( 0.00%) 29832.55 ( 4.28%) Hmean unlink1-processes-8 37323.75 ( 0.00%) 44974.57 ( 20.50%) Hmean unlink1-processes-12 43571.88 ( 0.00%) 44283.01 ( 1.63%) Hmean unlink1-processes-21 34431.52 ( 0.00%) 38284.45 ( 11.19%) Hmean unlink1-processes-30 34813.26 ( 0.00%) 37975.17 ( 9.08%) Hmean unlink1-processes-48 37048.90 ( 0.00%) 39862.78 ( 7.59%) Hmean unlink1-processes-79 35630.01 ( 0.00%) 36855.30 ( 3.44%) Hmean unlink1-processes-110 36115.85 ( 0.00%) 39843.91 ( 10.32%) Hmean unlink1-processes-141 32546.96 ( 0.00%) 35418.52 ( 8.82%) Hmean unlink1-processes-172 34674.79 ( 0.00%) 36899.21 ( 6.42%) Hmean unlink1-processes-203 37303.11 ( 0.00%) 36393.04 ( -2.44%) Hmean unlink1-processes-224 35712.13 ( 0.00%) 36685.96 ( 2.73%) == 2. ppc64le == Avg runtime set_task_state(): 938 msecs Avg runtime set_current_state: 940 msecs vanilla dirty Hmean unlink1-processes-2 19269.19 ( 0.00%) 30704.50 ( 59.35%) Hmean unlink1-processes-5 20106.15 ( 0.00%) 21804.15 ( 8.45%) Hmean unlink1-processes-8 17496.97 ( 0.00%) 17243.28 ( -1.45%) Hmean unlink1-processes-12 14224.15 ( 0.00%) 17240.21 ( 21.20%) Hmean unlink1-processes-21 14155.66 ( 0.00%) 15681.23 ( 10.78%) Hmean unlink1-processes-30 14450.70 ( 0.00%) 15995.83 ( 10.69%) Hmean unlink1-processes-48 16945.57 ( 0.00%) 16370.42 ( -3.39%) Hmean unlink1-processes-79 15788.39 ( 0.00%) 14639.27 ( -7.28%) Hmean unlink1-processes-110 14268.48 ( 0.00%) 14377.40 ( 0.76%) Hmean unlink1-processes-141 14023.65 ( 0.00%) 16271.69 ( 16.03%) Hmean unlink1-processes-172 13417.62 ( 0.00%) 16067.55 ( 19.75%) Hmean unlink1-processes-203 15293.08 ( 0.00%) 15440.40 ( 0.96%) Hmean unlink1-processes-234 13719.32 ( 0.00%) 16190.74 ( 18.01%) Hmean unlink1-processes-265 16400.97 ( 0.00%) 16115.22 ( -1.74%) Hmean unlink1-processes-296 14388.60 ( 0.00%) 16216.13 ( 12.70%) Hmean unlink1-processes-320 15771.85 ( 0.00%) 15905.96 ( 0.85%) x86-64 (known to be fast for get_current()/this_cpu_read_stable() caching) and ppc64 (with paca) show similar improvements in the unlink microbenches. The small delta for ppc64 (2ms), does not represent the gains on the unlink runs. In the case of x86, there was a decent amount of variation in the latency runs, but always within a 20 to 50ms increase), ppc was more constant. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Cc: mark.rutland@arm.com Link: http://lkml.kernel.org/r/1483479794-14013-5-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/sched.h | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ad3ec9ec61f7..f4f9d32f12b3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -227,7 +227,7 @@ extern void proc_sched_set_task(struct task_struct *p); extern char ___assert_task_state[1 - 2*!!( sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; -/* Convenience macros for the sake of set_task_state */ +/* Convenience macros for the sake of set_current_state */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) #define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) @@ -254,17 +254,6 @@ extern char ___assert_task_state[1 - 2*!!( #ifdef CONFIG_DEBUG_ATOMIC_SLEEP -#define __set_task_state(tsk, state_value) \ - do { \ - (tsk)->task_state_change = _THIS_IP_; \ - (tsk)->state = (state_value); \ - } while (0) -#define set_task_state(tsk, state_value) \ - do { \ - (tsk)->task_state_change = _THIS_IP_; \ - smp_store_mb((tsk)->state, (state_value)); \ - } while (0) - #define __set_current_state(state_value) \ do { \ current->task_state_change = _THIS_IP_; \ @@ -277,20 +266,6 @@ extern char ___assert_task_state[1 - 2*!!( } while (0) #else - -/* - * @tsk had better be current, or you get to keep the pieces. - * - * The only reason is that computing current can be more expensive than - * using a pointer that's already available. - * - * Therefore, see set_current_state(). - */ -#define __set_task_state(tsk, state_value) \ - do { (tsk)->state = (state_value); } while (0) -#define set_task_state(tsk, state_value) \ - smp_store_mb((tsk)->state, (state_value)) - /* * set_current_state() includes a barrier so that the write of current->state * is correctly serialised wrt the caller's subsequent test of whether to -- cgit v1.2.3 From 8f95c90ceb541a38ac16fec48c05142ef1450c25 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 11 Jan 2017 07:22:25 -0800 Subject: sched/wait, RCU: Introduce rcuwait machinery rcuwait provides support for (single) RCU-safe task wait/wake functionality, with the caveat that it must not be called after exit_notify(), such that we avoid racing with rcu delayed_put_task_struct callbacks, task_struct being rcu unaware in this context -- for which we similarly have task_rcu_dereference() magic, but with different return semantics, which can conflict with the wakeup side. The interfaces are quite straightforward: rcuwait_wait_event() rcuwait_wake_up() More details are in the comments, but it's perhaps worth mentioning at least, that users must provide proper serialization when waiting on a condition, and avoid corrupting a concurrent waiter. Also care must be taken between the task and the condition for when calling the wakeup -- we cannot miss wakeups. When porting users, this is for example, a given when using waitqueues in that everything is done under the q->lock. As such, it can remove sources of non preemptable unbounded work for realtime. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Link: http://lkml.kernel.org/r/1484148146-14210-2-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/rcuwait.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 include/linux/rcuwait.h (limited to 'include/linux') diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h new file mode 100644 index 000000000000..0e93d56c7ab2 --- /dev/null +++ b/include/linux/rcuwait.h @@ -0,0 +1,63 @@ +#ifndef _LINUX_RCUWAIT_H_ +#define _LINUX_RCUWAIT_H_ + +#include + +/* + * rcuwait provides a way of blocking and waking up a single + * task in an rcu-safe manner; where it is forbidden to use + * after exit_notify(). task_struct is not properly rcu protected, + * unless dealing with rcu-aware lists, ie: find_task_by_*(). + * + * Alternatively we have task_rcu_dereference(), but the return + * semantics have different implications which would break the + * wakeup side. The only time @task is non-nil is when a user is + * blocked (or checking if it needs to) on a condition, and reset + * as soon as we know that the condition has succeeded and are + * awoken. + */ +struct rcuwait { + struct task_struct *task; +}; + +#define __RCUWAIT_INITIALIZER(name) \ + { .task = NULL, } + +static inline void rcuwait_init(struct rcuwait *w) +{ + w->task = NULL; +} + +extern void rcuwait_wake_up(struct rcuwait *w); + +/* + * The caller is responsible for locking around rcuwait_wait_event(), + * such that writes to @task are properly serialized. + */ +#define rcuwait_wait_event(w, condition) \ +({ \ + /* \ + * Complain if we are called after do_exit()/exit_notify(), \ + * as we cannot rely on the rcu critical region for the \ + * wakeup side. \ + */ \ + WARN_ON(current->exit_state); \ + \ + rcu_assign_pointer((w)->task, current); \ + for (;;) { \ + /* \ + * Implicit barrier (A) pairs with (B) in \ + * rcuwait_trywake(). \ + */ \ + set_current_state(TASK_UNINTERRUPTIBLE); \ + if (condition) \ + break; \ + \ + schedule(); \ + } \ + \ + WRITE_ONCE((w)->task, NULL); \ + __set_current_state(TASK_RUNNING); \ +}) + +#endif /* _LINUX_RCUWAIT_H_ */ -- cgit v1.2.3 From 52b94129f274937e4c25dd17b76697664a3c43c9 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 11 Jan 2017 07:22:26 -0800 Subject: locking/percpu-rwsem: Replace waitqueue with rcuwait The use of any kind of wait queue is an overkill for pcpu-rwsems. While one option would be to use the less heavy simple (swait) flavor, this is still too much for what pcpu-rwsems needs. For one, we do not care about any sort of queuing in that the only (rare) time writers (and readers, for that matter) are queued is when trying to acquire the regular contended rw_sem. There cannot be any further queuing as writers are serialized by the rw_sem in the first place. Given that percpu_down_write() must not be called after exit_notify(), we can replace the bulky waitqueue with rcuwait such that a writer can wait for its turn to take the lock. As such, we can avoid the queue handling and locking overhead. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Link: http://lkml.kernel.org/r/1484148146-14210-3-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/percpu-rwsem.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 5b2e6159b744..93664f022ecf 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -4,15 +4,15 @@ #include #include #include -#include +#include #include #include struct percpu_rw_semaphore { struct rcu_sync rss; unsigned int __percpu *read_count; - struct rw_semaphore rw_sem; - wait_queue_head_t writer; + struct rw_semaphore rw_sem; /* slowpath */ + struct rcuwait writer; /* blocked writer */ int readers_block; }; @@ -22,7 +22,7 @@ static struct percpu_rw_semaphore name = { \ .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \ .read_count = &__percpu_rwsem_rc_##name, \ .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ - .writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer), \ + .writer = __RCUWAIT_INITIALIZER(name.writer), \ } extern int __percpu_down_read(struct percpu_rw_semaphore *, int); -- cgit v1.2.3 From e274795ea7b7caa0fd74ef651594382a69e2a951 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Jan 2017 14:17:48 +0100 Subject: locking/mutex: Fix mutex handoff While reviewing the ww_mutex patches, I noticed that it was still possible to (incorrectly) succeed for (incorrect) code like: mutex_lock(&a); mutex_lock(&a); This was possible if the second mutex_lock() would block (as expected) but then receive a spurious wakeup. At that point it would find itself at the front of the queue, request a handoff and instantly claim ownership and continue, since owner would point to itself. Avoid this scenario and simplify the code by introducing a third low bit to signal handoff pickup. So once we request handoff, unlock clears the handoff bit and sets the pickup bit along with the new owner. This also removes the need for the .handoff argument to __mutex_trylock(), since that becomes superfluous with PICKUP. In order to guarantee enough low bits, ensure task_struct alignment is at least L1_CACHE_BYTES (which seems a good ideal regardless). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 9d659ae14b54 ("locking/mutex: Add lock handoff to avoid starvation") Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index b97870f2debd..3e1fccb47f11 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -65,7 +65,7 @@ struct mutex { static inline struct task_struct *__mutex_owner(struct mutex *lock) { - return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03); + return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x07); } /* -- cgit v1.2.3 From ea9e0fb8fe1bdfca81bd76052a5cce70bb053430 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Wed, 21 Dec 2016 19:46:32 +0100 Subject: locking/ww_mutex: Set use_ww_ctx even when locking without a context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We will add a new field to struct mutex_waiter. This field must be initialized for all waiters if any waiter uses the ww_use_ctx path. So there is a trade-off: Keep ww_mutex locking without a context on the faster non-use_ww_ctx path, at the cost of adding the initialization to all mutex locks (including non-ww_mutexes), or avoid the additional cost for non-ww_mutex locks, at the cost of adding additional checks to the use_ww_ctx path. We take the latter choice. It may be worth eliminating the users of ww_mutex_lock(lock, NULL), but there are a lot of them. Signed-off-by: Nicolai Hähnle Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1482346000-9927-5-git-send-email-nhaehnle@gmail.com Signed-off-by: Ingo Molnar --- include/linux/ww_mutex.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 7b0066814fa0..5f2e8379baff 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -222,11 +222,7 @@ extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock, */ static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - if (ctx) - return __ww_mutex_lock(lock, ctx); - - mutex_lock(&lock->base); - return 0; + return __ww_mutex_lock(lock, ctx); } /** @@ -262,10 +258,7 @@ static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ct static inline int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - if (ctx) - return __ww_mutex_lock_interruptible(lock, ctx); - else - return mutex_lock_interruptible(&lock->base); + return __ww_mutex_lock_interruptible(lock, ctx); } /** -- cgit v1.2.3 From c5470b22d1833241cae996d23a8a346ff8ec4d58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Wed, 21 Dec 2016 19:46:33 +0100 Subject: locking/ww_mutex: Remove the __ww_mutex_lock*() inline wrappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keep the documentation in the header file since there is no good place for it in mutex.c: there are two rather different implementations with different EXPORT_SYMBOLs for each function. Signed-off-by: Nicolai Hähnle Signed-off-by: Peter Zijlstra (Intel) Cc: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1482346000-9927-6-git-send-email-nhaehnle@gmail.com Signed-off-by: Ingo Molnar --- include/linux/ww_mutex.h | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 5f2e8379baff..c07528522bbc 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -186,11 +186,6 @@ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) #endif } -extern int __must_check __ww_mutex_lock(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx); -extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx); - /** * ww_mutex_lock - acquire the w/w mutex * @lock: the mutex to be acquired @@ -220,10 +215,8 @@ extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock, * * A mutex acquired with this function must be released with ww_mutex_unlock. */ -static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -{ - return __ww_mutex_lock(lock, ctx); -} +extern int __must_check ww_mutex_lock(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx); /** * ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible @@ -255,11 +248,8 @@ static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ct * * A mutex acquired with this function must be released with ww_mutex_unlock. */ -static inline int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx) -{ - return __ww_mutex_lock_interruptible(lock, ctx); -} +extern int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx); /** * ww_mutex_lock_slow - slowpath acquiring of the w/w mutex -- cgit v1.2.3 From 6baa5c60a97d7f1a37a4b5ab5fb3a450e56e8b06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Wed, 21 Dec 2016 19:46:34 +0100 Subject: locking/ww_mutex: Add waiters in stamp order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add regular waiters in stamp order. Keep adding waiters that have no context in FIFO order and take care not to starve them. While adding our task as a waiter, back off if we detect that there is a waiter with a lower stamp in front of us. Make sure to call lock_contended even when we back off early. For w/w mutexes, being first in the wait list is only stable when taking the lock without a context. Therefore, the purpose of the first flag is split into two: 'first' remains to indicate whether we want to spin optimistically, while 'handoff' indicates that we should be prepared to accept a handoff. For w/w locking with a context, we always accept handoffs after the first schedule(), to handle the following sequence of events: 1. Task #0 unlocks and hands off to Task #2 which is first in line 2. Task #1 adds itself in front of Task #2 3. Task #2 wakes up and must accept the handoff even though it is no longer first in line Signed-off-by: Nicolai Hähnle Signed-off-by: Peter Zijlstra (Intel) Cc: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1482346000-9927-7-git-send-email-nhaehnle@gmail.com Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 3e1fccb47f11..e17942ffb3fc 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -20,6 +20,8 @@ #include #include +struct ww_acquire_ctx; + /* * Simple, straightforward mutexes with strict semantics: * @@ -75,6 +77,7 @@ static inline struct task_struct *__mutex_owner(struct mutex *lock) struct mutex_waiter { struct list_head list; struct task_struct *task; + struct ww_acquire_ctx *ww_ctx; #ifdef CONFIG_DEBUG_MUTEXES void *magic; #endif -- cgit v1.2.3 From 977625a693283dbb35b2a3f674bc0237f7347348 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Wed, 21 Dec 2016 19:46:39 +0100 Subject: locking/mutex: Initialize mutex_waiter::ww_ctx with poison when debugging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Help catch cases where mutex_lock is used directly on w/w mutexes, which otherwise result in the w/w tasks reading uninitialized data. Signed-off-by: Nicolai Hähnle Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Link: http://lkml.kernel.org/r/1482346000-9927-12-git-send-email-nhaehnle@gmail.com Signed-off-by: Ingo Molnar --- include/linux/poison.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/poison.h b/include/linux/poison.h index 51334edec506..a39540326417 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -80,6 +80,7 @@ /********** kernel/mutexes **********/ #define MUTEX_DEBUG_INIT 0x11 #define MUTEX_DEBUG_FREE 0x22 +#define MUTEX_POISON_WW_CTX ((void *) 0x500 + POISON_POINTER_DELTA) /********** lib/flex_array.c **********/ #define FLEX_ARRAY_FREE 0x6c /* for use-after-free poisoning */ -- cgit v1.2.3 From 12907fbb1a691807bb0420a27126e15934cb7954 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Dec 2016 11:44:28 +0100 Subject: sched/clock, clocksource: Add optional cs::mark_unstable() method PeterZ reported that we'd fail to mark the TSC unstable when the clocksource watchdog finds it unsuitable. Allow a clocksource to run a custom action when its being marked unstable and hook up the TSC unstable code. Reported-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/clocksource.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index e315d04a2fd9..cfc75848a35d 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -62,6 +62,8 @@ struct module; * @archdata: arch-specific data * @suspend: suspend function for the clocksource, if necessary * @resume: resume function for the clocksource, if necessary + * @mark_unstable: Optional function to inform the clocksource driver that + * the watchdog marked the clocksource unstable * @owner: module reference, must be set by clocksource in modules * * Note: This struct is not used in hotpathes of the timekeeping code @@ -93,6 +95,7 @@ struct clocksource { unsigned long flags; void (*suspend)(struct clocksource *cs); void (*resume)(struct clocksource *cs); + void (*mark_unstable)(struct clocksource *cs); /* private: */ #ifdef CONFIG_CLOCKSOURCE_WATCHDOG -- cgit v1.2.3 From 9881b024b7d7671f6a014091bc96506b89081802 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Dec 2016 13:35:52 +0100 Subject: sched/clock: Delay switching sched_clock to stable Currently we switch to the stable sched_clock if we guess the TSC is usable, and then switch back to the unstable path if it turns out TSC isn't stable during SMP bringup after all. Delay switching to the stable path until after SMP bringup is complete. This way we'll avoid switching during the time we detect the worst of the TSC offences. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ad3ec9ec61f7..94a48bb58297 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2515,6 +2515,10 @@ extern u64 sched_clock_cpu(int cpu); extern void sched_clock_init(void); #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +static inline void sched_clock_init_late(void) +{ +} + static inline void sched_clock_tick(void) { } @@ -2537,6 +2541,7 @@ static inline u64 local_clock(void) return sched_clock(); } #else +extern void sched_clock_init_late(void); /* * Architectures can set this to 1 if they have specified * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, -- cgit v1.2.3 From 10ab56434f2f633a51e432ee8b7c29e12438e163 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 28 Oct 2016 12:58:10 -0400 Subject: sched/core: Separate out io_schedule_prepare() and io_schedule_finish() Now that IO schedule accounting is done inside __schedule(), io_schedule() can be split into three steps - prep, schedule, and finish - where the schedule part doesn't need any special annotation. This allows marking a sleep as iowait by simply wrapping an existing blocking function with io_schedule_prepare() and io_schedule_finish(). Because task_struct->in_iowait is single bit, the caller of io_schedule_prepare() needs to record and the pass its state to io_schedule_finish() to be safe regarding nesting. While this isn't the prettiest, these functions are mostly gonna be used by core functions and we don't want to use more space for ->in_iowait. While at it, as it's simple to do now, reimplement io_schedule() without unnecessarily going through io_schedule_timeout(). Signed-off-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Jens Axboe Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: adilger.kernel@dilger.ca Cc: jack@suse.com Cc: kernel-team@fb.com Cc: mingbo@fb.com Cc: tytso@mit.edu Link: http://lkml.kernel.org/r/1477673892-28940-3-git-send-email-tj@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 94a48bb58297..a8daed914eef 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -461,12 +461,10 @@ extern signed long schedule_timeout_idle(signed long timeout); asmlinkage void schedule(void); extern void schedule_preempt_disabled(void); +extern int __must_check io_schedule_prepare(void); +extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); - -static inline void io_schedule(void) -{ - io_schedule_timeout(MAX_SCHEDULE_TIMEOUT); -} +extern void io_schedule(void); void __noreturn do_task_dead(void); -- cgit v1.2.3 From 1460cb65a10f6c7a6e3a1c76513338861a0a43b6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 28 Oct 2016 12:58:11 -0400 Subject: locking/mutex, sched/wait: Add mutex_lock_io() We sometimes end up propagating IO blocking through mutexes; however, because there currently is no way of annotating mutex sleeps as iowait, there are cases where iowait and /proc/stat:procs_blocked report misleading numbers obscuring the actual state of the system. This patch adds mutex_lock_io() so that mutex sleeps can be marked as iowait in those cases. Signed-off-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Jens Axboe Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: adilger.kernel@dilger.ca Cc: jack@suse.com Cc: kernel-team@fb.com Cc: mingbo@fb.com Cc: tytso@mit.edu Link: http://lkml.kernel.org/r/1477673892-28940-4-git-send-email-tj@kernel.org Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index b97870f2debd..980ba16b8749 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -156,10 +156,12 @@ extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); extern int __must_check mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass); +extern void mutex_lock_io_nested(struct mutex *lock, unsigned int subclass); #define mutex_lock(lock) mutex_lock_nested(lock, 0) #define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0) #define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0) +#define mutex_lock_io(lock) mutex_lock_io_nested(lock, 0) #define mutex_lock_nest_lock(lock, nest_lock) \ do { \ @@ -171,11 +173,13 @@ do { \ extern void mutex_lock(struct mutex *lock); extern int __must_check mutex_lock_interruptible(struct mutex *lock); extern int __must_check mutex_lock_killable(struct mutex *lock); +extern void mutex_lock_io(struct mutex *lock); # define mutex_lock_nested(lock, subclass) mutex_lock(lock) # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) +# define mutex_lock_nest_io(lock, nest_lock) mutex_io(lock) #endif /* -- cgit v1.2.3 From 9e3d6223d2093a8903c8f570a06284453ee59944 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Dec 2016 09:30:11 +0100 Subject: math64, timers: Fix 32bit mul_u64_u32_shr() and friends It turns out that while GCC-4.4 manages to generate 32x32->64 mult instructions for the 32bit mul_u64_u32_shr() code, any GCC after that fails horribly. Fix this by providing an explicit mul_u32_u32() function which can be architcture provided. Reported-by: Chris Metcalf Signed-off-by: Peter Zijlstra (Intel) Acked-by: Chris Metcalf [for tile] Cc: Christopher S. Hall Cc: David Gibson Cc: John Stultz Cc: Laurent Vivier Cc: Liav Rehana Cc: Linus Torvalds Cc: Parit Bhargava Cc: Peter Zijlstra Cc: Richard Cochran Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161209083011.GD15765@worktop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/math64.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/math64.h b/include/linux/math64.h index 6e8b5b270ffe..80690c96c734 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -133,6 +133,16 @@ __iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) return ret; } +#ifndef mul_u32_u32 +/* + * Many a GCC version messes this up and generates a 64x64 mult :-( + */ +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + return (u64)a * b; +} +#endif + #if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__) #ifndef mul_u64_u32_shr @@ -160,9 +170,9 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) al = a; ah = a >> 32; - ret = ((u64)al * mul) >> shift; + ret = mul_u32_u32(al, mul) >> shift; if (ah) - ret += ((u64)ah * mul) << (32 - shift); + ret += mul_u32_u32(ah, mul) << (32 - shift); return ret; } @@ -186,10 +196,10 @@ static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift) a0.ll = a; b0.ll = b; - rl.ll = (u64)a0.l.low * b0.l.low; - rm.ll = (u64)a0.l.low * b0.l.high; - rn.ll = (u64)a0.l.high * b0.l.low; - rh.ll = (u64)a0.l.high * b0.l.high; + rl.ll = mul_u32_u32(a0.l.low, b0.l.low); + rm.ll = mul_u32_u32(a0.l.low, b0.l.high); + rn.ll = mul_u32_u32(a0.l.high, b0.l.low); + rh.ll = mul_u32_u32(a0.l.high, b0.l.high); /* * Each of these lines computes a 64-bit intermediate result into "c", @@ -229,8 +239,8 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor) } u, rl, rh; u.ll = a; - rl.ll = (u64)u.l.low * mul; - rh.ll = (u64)u.l.high * mul + rl.l.high; + rl.ll = mul_u32_u32(u.l.low, mul); + rh.ll = mul_u32_u32(u.l.high, mul) + rl.l.high; /* Bits 32-63 of the result will be in rh.l.low. */ rl.l.high = do_div(rh.ll, divisor); -- cgit v1.2.3 From af2e859edd477fa1ea3d1d106f41a595cff3d162 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 1 Dec 2016 11:47:04 +0000 Subject: locking/ww_mutex: Fix compilation of __WW_MUTEX_INITIALIZER From conflicting macro parameters, passing the wrong name to __MUTEX_INITIALIZER and a stray '\', #define __WW_MUTEX_INITIALIZER was very unhappy. One unnecessary change was to choose to pass &ww_class instead of implicitly taking the address of the class within the macro. Signed-off-by: Chris Wilson Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 1b375dc30710 ("mutex: Move ww_mutex definitions to ww_mutex.h") Link: http://lkml.kernel.org/r/20161201114711.28697-2-chris@chris-wilson.co.uk Signed-off-by: Ingo Molnar --- include/linux/ww_mutex.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index c07528522bbc..083950fd5b0b 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -51,10 +51,10 @@ struct ww_mutex { }; #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class) \ - , .ww_class = &ww_class +# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) \ + , .ww_class = class #else -# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class) +# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) #endif #define __WW_CLASS_INITIALIZER(ww_class) \ @@ -63,7 +63,7 @@ struct ww_mutex { , .mutex_name = #ww_class "_mutex" } #define __WW_MUTEX_INITIALIZER(lockname, class) \ - { .base = { \__MUTEX_INITIALIZER(lockname) } \ + { .base = __MUTEX_INITIALIZER(lockname.base) \ __WW_CLASS_MUTEX_INITIALIZER(lockname, class) } #define DEFINE_WW_CLASS(classname) \ -- cgit v1.2.3 From 1e24edca0557dba6486d39d3c24c288475432bcf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 17:12:23 +0100 Subject: locking/atomic, kref: Add KREF_INIT() Since we need to change the implementation, stop exposing internals. Provide KREF_INIT() to allow static initialization of struct kref. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/kref.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kref.h b/include/linux/kref.h index e15828fd71f1..9af255ad1e2f 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -24,6 +24,8 @@ struct kref { atomic_t refcount; }; +#define KREF_INIT(n) { .refcount = ATOMIC_INIT(n), } + /** * kref_init - initialize object. * @kref: object in question. -- cgit v1.2.3 From 2c935bc57221cc2edc787c72ea0e2d30cdcd3d5e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 17:29:48 +0100 Subject: locking/atomic, kref: Add kref_read() Since we need to change the implementation, stop exposing internals. Provide kref_read() to read the current reference count; typically used for debug messages. Kills two anti-patterns: atomic_read(&kref->refcount) kref->refcount.counter Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/kref.h | 5 +++++ include/linux/sunrpc/cache.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kref.h b/include/linux/kref.h index 9af255ad1e2f..7c88d865f82f 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -35,6 +35,11 @@ static inline void kref_init(struct kref *kref) atomic_set(&kref->refcount, 1); } +static inline int kref_read(const struct kref *kref) +{ + return atomic_read(&kref->refcount); +} + /** * kref_get - increment refcount for object. * @kref: object. diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 62a60eeacb0a..8a511c0985aa 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -198,7 +198,7 @@ static inline struct cache_head *cache_get(struct cache_head *h) static inline void cache_put(struct cache_head *h, struct cache_detail *cd) { - if (atomic_read(&h->ref.refcount) <= 2 && + if (kref_read(&h->ref) <= 2 && h->expiry_time < cd->nextcheck) cd->nextcheck = h->expiry_time; kref_put(&h->ref, cd->cache_put); -- cgit v1.2.3 From bdfafc4ffdd24e491119d81f85ddc4393fa49803 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 17:34:19 +0100 Subject: locking/atomic, kref: Kill kref_sub() By general sentiment kref_sub() is a bad interface, make it go away. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/kref.h | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kref.h b/include/linux/kref.h index 7c88d865f82f..31c49a64cdf9 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -54,9 +54,8 @@ static inline void kref_get(struct kref *kref) } /** - * kref_sub - subtract a number of refcounts for object. + * kref_put - decrement refcount for object. * @kref: object. - * @count: Number of recounts to subtract. * @release: pointer to the function that will clean up the object when the * last reference to the object is released. * This pointer is required, and it is not acceptable to pass kfree @@ -65,46 +64,23 @@ static inline void kref_get(struct kref *kref) * maintainer, and anyone else who happens to notice it. You have * been warned. * - * Subtract @count from the refcount, and if 0, call release(). + * Decrement the refcount, and if 0, call release(). * Return 1 if the object was removed, otherwise return 0. Beware, if this * function returns 0, you still can not count on the kref from remaining in * memory. Only use the return value if you want to see if the kref is now * gone, not present. */ -static inline int kref_sub(struct kref *kref, unsigned int count, - void (*release)(struct kref *kref)) +static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) { WARN_ON(release == NULL); - if (atomic_sub_and_test((int) count, &kref->refcount)) { + if (atomic_dec_and_test(&kref->refcount)) { release(kref); return 1; } return 0; } -/** - * kref_put - decrement refcount for object. - * @kref: object. - * @release: pointer to the function that will clean up the object when the - * last reference to the object is released. - * This pointer is required, and it is not acceptable to pass kfree - * in as this function. If the caller does pass kfree to this - * function, you will be publicly mocked mercilessly by the kref - * maintainer, and anyone else who happens to notice it. You have - * been warned. - * - * Decrement the refcount, and if 0, call release(). - * Return 1 if the object was removed, otherwise return 0. Beware, if this - * function returns 0, you still can not count on the kref from remaining in - * memory. Only use the return value if you want to see if the kref is now - * gone, not present. - */ -static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) -{ - return kref_sub(kref, 1, release); -} - static inline int kref_put_mutex(struct kref *kref, void (*release)(struct kref *kref), struct mutex *lock) -- cgit v1.2.3 From 23b19ec3377924eb8f303880102e056e9214ba72 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Jan 2017 12:11:59 +0100 Subject: locking/ww_mutex: Turn off __must_check for now MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the ww_mutex inline wrappers gone there's a lot of dormant anti-patterns emerging in an x86 allyesconfig build: kernel/locking/test-ww_mutex.c:80:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] kernel/locking/test-ww_mutex.c:55:3: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] kernel/locking/test-ww_mutex.c:134:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] kernel/locking/test-ww_mutex.c:213:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] kernel/locking/test-ww_mutex.c:177:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] kernel/locking/test-ww_mutex.c:266:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:213:19: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] lib/locking-selftest.c:211:20: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/drm_modeset_lock.c:430:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c:70:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/vgem/vgem_fence.c:193:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/i915/i915_gem_batch_pool.c:125:4: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/i915/i915_gem_execbuffer.c:1302:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/radeon/radeon_prime.c:69:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] drivers/gpu/drm/nouveau/nouveau_prime.c:70:2: warning: ignoring return value of ‘ww_mutex_lock’, declared with attribute warn_unused_result [-Wunused-result] ... but we cannot just litter the kernel build log with such warnings. These need to be fixed separately - turn off the warning for now. Cc: Nicolai Hähnle Cc: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Wilson Cc: Daniel Vetter Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dri-devel@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/ww_mutex.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 083950fd5b0b..5dd9a7682227 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -215,8 +215,7 @@ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) * * A mutex acquired with this function must be released with ww_mutex_unlock. */ -extern int __must_check ww_mutex_lock(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx); +extern int /* __must_check */ ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx); /** * ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible -- cgit v1.2.3 From f21860bac05b609d71757338361d26209ff0759b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Jan 2017 17:11:36 +0100 Subject: locking/mutex, sched/wait: Fix the mutex_lock_io_nested() define Mike noticed this bogosity: > > +# define mutex_lock_nest_io(lock, nest_lock) mutex_io(lock) > ^^^^^^^^^^^^^^ typo This new locking API is not used yet, so this didn't trigger in testing. Fix it. Reported-by: Mike Galbraith Cc: Tejun Heo Cc: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Jens Axboe Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: adilger.kernel@dilger.ca Cc: jack@suse.com Cc: kernel-team@fb.com Cc: mingbo@fb.com Cc: tytso@mit.edu Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 980ba16b8749..7fffbfcd5430 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -179,7 +179,7 @@ extern void mutex_lock_io(struct mutex *lock); # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) -# define mutex_lock_nest_io(lock, nest_lock) mutex_io(lock) +# define mutex_lock_io_nested(lock, subclass) mutex_lock(lock) #endif /* -- cgit v1.2.3 From e12799125074c67cd30bc3d7a5ff5e2f29ea1411 Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Tue, 22 Nov 2016 19:03:09 +0200 Subject: firmware: qcom: scm: Add empty functions to help compile testing This will help to compile testing drivers which depends on scm functions with COMPILE_TEST Kconfig option. Signed-off-by: Stanimir Varbanov Reviewed-by: Stephen Boyd Signed-off-by: Andy Gross --- include/linux/qcom_scm.h | 50 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index cc32ab852fbc..7e004fb57fc4 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -13,9 +13,9 @@ #ifndef __QCOM_SCM_H #define __QCOM_SCM_H -extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus); -extern int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus); - +#define QCOM_SCM_VERSION(major, minor) (((major) << 16) | ((minor) & 0xFF)) +#define QCOM_SCM_CPU_PWR_DOWN_L2_ON 0x0 +#define QCOM_SCM_CPU_PWR_DOWN_L2_OFF 0x1 #define QCOM_SCM_HDCP_MAX_REQ_CNT 5 struct qcom_scm_hdcp_req { @@ -23,27 +23,47 @@ struct qcom_scm_hdcp_req { u32 val; }; +#if IS_ENABLED(CONFIG_QCOM_SCM) +extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus); +extern int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus); extern bool qcom_scm_is_available(void); - extern bool qcom_scm_hdcp_available(void); extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, - u32 *resp); - + u32 *resp); extern bool qcom_scm_pas_supported(u32 peripheral); extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, - size_t size); + size_t size); extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, - phys_addr_t size); + phys_addr_t size); extern int qcom_scm_pas_auth_and_reset(u32 peripheral); extern int qcom_scm_pas_shutdown(u32 peripheral); - -#define QCOM_SCM_CPU_PWR_DOWN_L2_ON 0x0 -#define QCOM_SCM_CPU_PWR_DOWN_L2_OFF 0x1 - extern void qcom_scm_cpu_power_down(u32 flags); - -#define QCOM_SCM_VERSION(major, minor) (((major) << 16) | ((minor) & 0xFF)) - extern u32 qcom_scm_get_version(void); +#else +static inline +int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus) +{ + return -ENODEV; +} +static inline +int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus) +{ + return -ENODEV; +} +static inline bool qcom_scm_is_available(void) { return false; } +static inline bool qcom_scm_hdcp_available(void) { return false; } +static inline int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, + u32 *resp) { return -ENODEV; } +static inline bool qcom_scm_pas_supported(u32 peripheral) { return false; } +static inline int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, + size_t size) { return -ENODEV; } +static inline int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, + phys_addr_t size) { return -ENODEV; } +static inline int +qcom_scm_pas_auth_and_reset(u32 peripheral) { return -ENODEV; } +static inline int qcom_scm_pas_shutdown(u32 peripheral) { return -ENODEV; } +static inline void qcom_scm_cpu_power_down(u32 flags) {} +static inline u32 qcom_scm_get_version(void) { return 0; } +#endif #endif -- cgit v1.2.3 From a811b420b6c13759540070c0e9541b7cd8569168 Mon Sep 17 00:00:00 2001 From: Andy Gross Date: Mon, 16 Jan 2017 23:24:15 -0600 Subject: firmware: qcom_scm: Add set remote state API This patch adds a set remote state SCM API. This will be used by the Venus and GPU subsystems to set state on the remote processors. This work was based on two patch sets by Jordan Crouse and Stanimir Varbanov. Signed-off-by: Andy Gross --- include/linux/qcom_scm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 7e004fb57fc4..d32f6f1a5225 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -39,6 +39,7 @@ extern int qcom_scm_pas_auth_and_reset(u32 peripheral); extern int qcom_scm_pas_shutdown(u32 peripheral); extern void qcom_scm_cpu_power_down(u32 flags); extern u32 qcom_scm_get_version(void); +extern int qcom_scm_set_remote_state(u32 state, u32 id); #else static inline int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus) @@ -64,6 +65,7 @@ qcom_scm_pas_auth_and_reset(u32 peripheral) { return -ENODEV; } static inline int qcom_scm_pas_shutdown(u32 peripheral) { return -ENODEV; } static inline void qcom_scm_cpu_power_down(u32 flags) {} static inline u32 qcom_scm_get_version(void) { return 0; } +static inline u32 +qcom_scm_set_remote_state(u32 state,u32 id) { return -ENODEV; } #endif - #endif -- cgit v1.2.3 From 5fa23530d4fcc7e84be9a557c58d0e670a15c042 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 16 Jan 2017 10:40:43 +0000 Subject: of: base: add support to find the level of the last cache It is useful to have helper function just to get the number of cache levels for a given logical cpu. We can obtain the same by just checking the level at which the last cache is present. This patch adds support to find the level of the last cache for a given cpu. It will be used on ARM64 platform where the device tree provides the information for the additional non-architected/transparent/external last level caches that are not integrated with the processors. Cc: Mark Rutland Suggested-by: Rob Herring Acked-by: Rob Herring Tested-by: Tan Xiaojun Signed-off-by: Sudeep Holla [will: use u32 instead of int for cache_level] Signed-off-by: Will Deacon --- include/linux/of.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index d72f01009297..21e6323de0f3 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -280,6 +280,7 @@ extern struct device_node *of_get_child_by_name(const struct device_node *node, /* cache lookup */ extern struct device_node *of_find_next_cache_node(const struct device_node *); +extern int of_find_last_cache_level(unsigned int cpu); extern struct device_node *of_find_node_with_property( struct device_node *from, const char *prop_name); -- cgit v1.2.3 From c51ca6cf545bc51ad38bd50816bde37c647d608d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 10 Dec 2016 15:13:59 -0700 Subject: block: move existing elevator ops to union Prep patch for adding MQ ops as well, since doing anon unions with named initializers doesn't work on older compilers. Signed-off-by: Jens Axboe Reviewed-by: Johannes Thumshirn Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval --- include/linux/elevator.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index b276e9ef0e0b..2a9e966eed03 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -94,7 +94,9 @@ struct elevator_type struct kmem_cache *icq_cache; /* fields provided by elevator implementation */ - struct elevator_ops ops; + union { + struct elevator_ops sq; + } ops; size_t icq_size; /* see iocontext.h */ size_t icq_align; /* ditto */ struct elv_fs_entry *elevator_attrs; -- cgit v1.2.3 From 16a3c2a70cad5ccdc2dc0a4544bff82554807493 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Dec 2016 14:27:46 -0700 Subject: blk-mq: un-export blk_mq_free_hctx_request() It's only used in blk-mq, kill it from the main exported header and kill the symbol export as well. Signed-off-by: Jens Axboe Reviewed-by: Johannes Thumshirn Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Omar Sandoval --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index afc81d77e471..2686f9e7302a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -181,7 +181,6 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); void blk_mq_insert_request(struct request *, bool, bool, bool); void blk_mq_free_request(struct request *rq); -void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); enum { -- cgit v1.2.3 From fd2d332677c687ca90c12a47d6c377c547100b56 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 12 Jan 2017 10:04:45 -0700 Subject: blk-mq: add support for carrying internal tag information in blk_qc_t No functional change in this patch, just in preparation for having two types of tags available to the block layer for a single request. Signed-off-by: Jens Axboe Reviewed-by: Omar Sandoval --- include/linux/blk_types.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 519ea2c9df61..0e5b1cd5113c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -232,22 +232,29 @@ static inline bool op_is_sync(unsigned int op) } typedef unsigned int blk_qc_t; -#define BLK_QC_T_NONE -1U -#define BLK_QC_T_SHIFT 16 +#define BLK_QC_T_NONE -1U +#define BLK_QC_T_SHIFT 16 +#define BLK_QC_T_INTERNAL (1U << 31) static inline bool blk_qc_t_valid(blk_qc_t cookie) { return cookie != BLK_QC_T_NONE; } -static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num) +static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num, + bool internal) { - return tag | (queue_num << BLK_QC_T_SHIFT); + blk_qc_t ret = tag | (queue_num << BLK_QC_T_SHIFT); + + if (internal) + ret |= BLK_QC_T_INTERNAL; + + return ret; } static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) { - return cookie >> BLK_QC_T_SHIFT; + return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; } static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) @@ -255,6 +262,11 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) return cookie & ((1u << BLK_QC_T_SHIFT) - 1); } +static inline bool blk_qc_t_is_internal(blk_qc_t cookie) +{ + return (cookie & BLK_QC_T_INTERNAL) != 0; +} + struct blk_issue_stat { u64 time; }; -- cgit v1.2.3 From bd166ef183c263c5ced656d49ef19c7da4adc774 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Jan 2017 06:03:22 -0700 Subject: blk-mq-sched: add framework for MQ capable IO schedulers This adds a set of hooks that intercepts the blk-mq path of allocating/inserting/issuing/completing requests, allowing us to develop a scheduler within that framework. We reuse the existing elevator scheduler API on the registration side, but augment that with the scheduler flagging support for the blk-mq interfce, and with a separate set of ops hooks for MQ devices. We split driver and scheduler tags, so we can run the scheduling independently of device queue depth. Signed-off-by: Jens Axboe Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval --- include/linux/blk-mq.h | 5 ++++- include/linux/blkdev.h | 4 +++- include/linux/elevator.h | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2686f9e7302a..63569eb46d15 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -22,6 +22,7 @@ struct blk_mq_hw_ctx { unsigned long flags; /* BLK_MQ_F_* flags */ + void *sched_data; struct request_queue *queue; struct blk_flush_queue *fq; @@ -35,6 +36,7 @@ struct blk_mq_hw_ctx { atomic_t wait_index; struct blk_mq_tags *tags; + struct blk_mq_tags *sched_tags; struct srcu_struct queue_rq_srcu; @@ -156,6 +158,7 @@ enum { BLK_MQ_S_STOPPED = 0, BLK_MQ_S_TAG_ACTIVE = 1, + BLK_MQ_S_SCHED_RESTART = 2, BLK_MQ_MAX_DEPTH = 10240, @@ -179,13 +182,13 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); -void blk_mq_insert_request(struct request *, bool, bool, bool); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); enum { BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */ BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */ + BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */ }; struct request *blk_mq_alloc_request(struct request_queue *q, int rw, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2e99d659b0f1..25564857f5f8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -154,6 +154,7 @@ struct request { /* the following two fields are internal, NEVER access directly */ unsigned int __data_len; /* total data len */ + int tag; sector_t __sector; /* sector cursor */ struct bio *bio; @@ -220,9 +221,10 @@ struct request { unsigned short ioprio; + int internal_tag; + void *special; /* opaque pointer available for LLD use */ - int tag; int errors; /* diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 2a9e966eed03..ecb96fd67c6d 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -77,6 +77,34 @@ struct elevator_ops elevator_registered_fn *elevator_registered_fn; }; +struct blk_mq_alloc_data; +struct blk_mq_hw_ctx; + +struct elevator_mq_ops { + int (*init_sched)(struct request_queue *, struct elevator_type *); + void (*exit_sched)(struct elevator_queue *); + + bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); + bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *); + int (*request_merge)(struct request_queue *q, struct request **, struct bio *); + void (*request_merged)(struct request_queue *, struct request *, int); + void (*requests_merged)(struct request_queue *, struct request *, struct request *); + struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *); + void (*put_request)(struct request *); + void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); + void (*dispatch_requests)(struct blk_mq_hw_ctx *, struct list_head *); + bool (*has_work)(struct blk_mq_hw_ctx *); + void (*completed_request)(struct blk_mq_hw_ctx *, struct request *); + void (*started_request)(struct request *); + void (*requeue_request)(struct request *); + struct request *(*former_request)(struct request_queue *, struct request *); + struct request *(*next_request)(struct request_queue *, struct request *); + int (*get_rq_priv)(struct request_queue *, struct request *); + void (*put_rq_priv)(struct request_queue *, struct request *); + void (*init_icq)(struct io_cq *); + void (*exit_icq)(struct io_cq *); +}; + #define ELV_NAME_MAX (16) struct elv_fs_entry { @@ -96,12 +124,14 @@ struct elevator_type /* fields provided by elevator implementation */ union { struct elevator_ops sq; + struct elevator_mq_ops mq; } ops; size_t icq_size; /* see iocontext.h */ size_t icq_align; /* ditto */ struct elv_fs_entry *elevator_attrs; char elevator_name[ELV_NAME_MAX]; struct module *elevator_owner; + bool uses_mq; /* managed by elevator core */ char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */ @@ -125,6 +155,7 @@ struct elevator_queue struct kobject kobj; struct mutex sysfs_lock; unsigned int registered:1; + unsigned int uses_mq:1; DECLARE_HASHTABLE(hash, ELV_HASH_BITS); }; @@ -141,6 +172,7 @@ extern void elv_merge_requests(struct request_queue *, struct request *, extern void elv_merged_request(struct request_queue *, struct request *, int); extern void elv_bio_merged(struct request_queue *q, struct request *, struct bio *); +extern bool elv_attempt_insert_merge(struct request_queue *, struct request *); extern void elv_requeue_request(struct request_queue *, struct request *); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); -- cgit v1.2.3 From d34849913819a5e0cbfbe724dbe79df89278c524 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Jan 2017 14:43:58 -0700 Subject: blk-mq-sched: allow setting of default IO scheduler Add Kconfig entries to manage what devices get assigned an MQ scheduler, and add a blk-mq flag for drivers to opt out of scheduling. The latter is useful for admin type queues that still allocate a blk-mq queue and tag set, but aren't use for normal IO. Signed-off-by: Jens Axboe Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 63569eb46d15..8e4df3d6c8cd 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -153,6 +153,7 @@ enum { BLK_MQ_F_SG_MERGE = 1 << 2, BLK_MQ_F_DEFER_ISSUE = 1 << 4, BLK_MQ_F_BLOCKING = 1 << 5, + BLK_MQ_F_NO_SCHED = 1 << 6, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, BLK_MQ_F_ALLOC_POLICY_BITS = 1, -- cgit v1.2.3 From 501db511397fd6efff3aa5b4e8de415b55559550 Mon Sep 17 00:00:00 2001 From: Rolf Neugebauer Date: Tue, 17 Jan 2017 18:13:51 +0000 Subject: virtio: don't set VIRTIO_NET_HDR_F_DATA_VALID on xmit This patch part reverts fd2a0437dc33 and e858fae2b0b8 which introduced a subtle change in how the virtio_net flags are derived from the SKBs ip_summed field. With the above commits, the flags are set to VIRTIO_NET_HDR_F_DATA_VALID when ip_summed == CHECKSUM_UNNECESSARY, thus treating it differently to ip_summed == CHECKSUM_NONE, which should be the same. Further, the virtio spec 1.0 / CS04 explicitly says that VIRTIO_NET_HDR_F_DATA_VALID must not be set by the driver. Fixes: fd2a0437dc33 ("virtio_net: introduce virtio_net_hdr_{from,to}_skb") Fixes: e858fae2b0b8 (" virtio_net: use common code for virtio_net_hdr and skb GSO conversion") Signed-off-by: Rolf Neugebauer Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 66204007d7ac..56436472ccc7 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -91,8 +91,6 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, skb_checksum_start_offset(skb)); hdr->csum_offset = __cpu_to_virtio16(little_endian, skb->csum_offset); - } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { - hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ return 0; -- cgit v1.2.3 From d45ae1f7041ac52ade6c5ec76d96bbed765d67aa Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 17 Jan 2017 12:29:35 -0500 Subject: tracing: Process constants for (un)likely() profiler When running the likely/unlikely profiler, one of the results did not look accurate. It noted that the unlikely() in link_path_walk() was 100% incorrect. When I added a trace_printk() to see what was happening there, it became 80% correct! Looking deeper into what whas happening, I found that gcc split that if statement into two paths. One where the if statement became a constant, the other path a variable. The other path had the if statement always hit (making the unlikely there, always false), but since the #define unlikely() has: #define unlikely() (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0)) Where constants are ignored by the branch profiler, the "constant" path made by the compiler was ignored, even though it was hit 80% of the time. By just passing the constant value to the __branch_check__() function and tracing it out of line (as always correct, as likely/unlikely isn't a factor for constants), then we get back the accurate readings of branches that were optimized by gcc causing part of the execution to become constant. Signed-off-by: Steven Rostedt (VMware) --- include/linux/compiler.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index cf0fa5d86059..bbbe1570de1c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -107,12 +107,13 @@ struct ftrace_branch_data { */ #if defined(CONFIG_TRACE_BRANCH_PROFILING) \ && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__) -void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); +void ftrace_likely_update(struct ftrace_branch_data *f, int val, + int expect, int is_constant); #define likely_notrace(x) __builtin_expect(!!(x), 1) #define unlikely_notrace(x) __builtin_expect(!!(x), 0) -#define __branch_check__(x, expect) ({ \ +#define __branch_check__(x, expect, is_constant) ({ \ int ______r; \ static struct ftrace_branch_data \ __attribute__((__aligned__(4))) \ @@ -122,8 +123,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); .file = __FILE__, \ .line = __LINE__, \ }; \ - ______r = likely_notrace(x); \ - ftrace_likely_update(&______f, ______r, expect); \ + ______r = __builtin_expect(!!(x), expect); \ + ftrace_likely_update(&______f, ______r, \ + expect, is_constant); \ ______r; \ }) @@ -133,10 +135,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); * written by Daniel Walker. */ # ifndef likely -# define likely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 1)) +# define likely(x) (__branch_check__(x, 1, __builtin_constant_p(x))) # endif # ifndef unlikely -# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0)) +# define unlikely(x) (__branch_check__(x, 0, __builtin_constant_p(x))) # endif #ifdef CONFIG_PROFILE_ALL_BRANCHES -- cgit v1.2.3 From 0a13cd1a05e7a549259d4f803d2ec2efda07ed7c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 18:03:11 +0100 Subject: locking/atomic, kref: Implement kref_put_lock() Because home-rolling your own is _awesome_, stop doing it. Provide kref_put_lock(), just like kref_put_mutex() but for a spinlock. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/kref.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kref.h b/include/linux/kref.h index 31c49a64cdf9..9db9685fed84 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -19,6 +19,7 @@ #include #include #include +#include struct kref { atomic_t refcount; @@ -86,12 +87,21 @@ static inline int kref_put_mutex(struct kref *kref, struct mutex *lock) { WARN_ON(release == NULL); - if (unlikely(!atomic_add_unless(&kref->refcount, -1, 1))) { - mutex_lock(lock); - if (unlikely(!atomic_dec_and_test(&kref->refcount))) { - mutex_unlock(lock); - return 0; - } + + if (atomic_dec_and_mutex_lock(&kref->refcount, lock)) { + release(kref); + return 1; + } + return 0; +} + +static inline int kref_put_lock(struct kref *kref, + void (*release)(struct kref *kref), + spinlock_t *lock) +{ + WARN_ON(release == NULL); + + if (atomic_dec_and_lock(&kref->refcount, lock)) { release(kref); return 1; } -- cgit v1.2.3 From 84d58132d285b2edef951d6633c1e5224e8b5283 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 11 Jan 2017 06:35:10 -0800 Subject: rpmsg: Introduce "poll" to endpoint ops This allows rpmsg backends to implement polling of the outgoing buffer, which provides poll support to user space when using the rpmsg character device. Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 18f9e1ae4b7e..10d6ae8bbb7d 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -41,6 +41,7 @@ #include #include #include +#include #define RPMSG_ADDR_ANY 0xFFFFFFFF @@ -156,6 +157,9 @@ int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, void *data, int len); +unsigned int rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp, + poll_table *wait); + #else static inline int register_rpmsg_device(struct rpmsg_device *dev) @@ -254,6 +258,15 @@ static inline int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, return -ENXIO; } +static inline unsigned int rpmsg_poll(struct rpmsg_endpoint *ept, + struct file *filp, poll_table *wait) +{ + /* This shouldn't be possible */ + WARN_ON(1); + + return 0; +} + #endif /* IS_ENABLED(CONFIG_RPMSG) */ /* use a macro to avoid include chaining to get THIS_MODULE */ -- cgit v1.2.3 From cc16f00f6529aa2378f2b949a6f68e9dc6dec363 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 18 Jan 2017 00:44:42 +0800 Subject: sctp: add support for generating stream reconf ssn reset request chunk This patch is to add asoc strreset_outseq and strreset_inseq for saving the reconf request sequence, initialize them when create assoc and process init, and also to define Incoming and Outgoing SSN Reset Request Parameter described in rfc6525 section 4.1 and 4.2, As they can be in one same chunk as section rfc6525 3.1-3 describes, it makes them in one function. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index fcb4c3646173..a9e790685af3 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -108,6 +108,7 @@ typedef enum { /* Use hex, as defined in ADDIP sec. 3.1 */ SCTP_CID_ASCONF = 0xC1, SCTP_CID_ASCONF_ACK = 0x80, + SCTP_CID_RECONF = 0x82, } sctp_cid_t; /* enum */ @@ -199,6 +200,13 @@ typedef enum { SCTP_PARAM_SUCCESS_REPORT = cpu_to_be16(0xc005), SCTP_PARAM_ADAPTATION_LAYER_IND = cpu_to_be16(0xc006), + /* RE-CONFIG. Section 4 */ + SCTP_PARAM_RESET_OUT_REQUEST = cpu_to_be16(0x000d), + SCTP_PARAM_RESET_IN_REQUEST = cpu_to_be16(0x000e), + SCTP_PARAM_RESET_TSN_REQUEST = cpu_to_be16(0x000f), + SCTP_PARAM_RESET_RESPONSE = cpu_to_be16(0x0010), + SCTP_PARAM_RESET_ADD_OUT_STREAMS = cpu_to_be16(0x0011), + SCTP_PARAM_RESET_ADD_IN_STREAMS = cpu_to_be16(0x0012), } sctp_param_t; /* enum */ @@ -710,4 +718,23 @@ struct sctp_infox { struct sctp_association *asoc; }; +struct sctp_reconf_chunk { + sctp_chunkhdr_t chunk_hdr; + __u8 params[0]; +} __packed; + +struct sctp_strreset_outreq { + sctp_paramhdr_t param_hdr; + __u32 request_seq; + __u32 response_seq; + __u32 send_reset_at_tsn; + __u16 list_of_streams[0]; +} __packed; + +struct sctp_strreset_inreq { + sctp_paramhdr_t param_hdr; + __u32 request_seq; + __u16 list_of_streams[0]; +} __packed; + #endif /* __LINUX_SCTP_H__ */ -- cgit v1.2.3 From c61f13eaa1ee17728c41370100d2d45c254ce76f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 13 Jan 2017 11:14:39 -0800 Subject: gcc-plugins: Add structleak for more stack initialization This plugin detects any structures that contain __user attributes and makes sure it is being fully initialized so that a specific class of information exposure is eliminated. (This plugin was originally designed to block the exposure of siginfo in CVE-2013-2141.) Ported from grsecurity/PaX. This version adds a verbose option to the plugin and the Kconfig. Signed-off-by: Kees Cook --- include/linux/compiler.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index cf0fa5d86059..91c30cba984e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -27,7 +27,11 @@ extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); # define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member)) #else /* __CHECKER__ */ -# define __user +# ifdef STRUCTLEAK_PLUGIN +# define __user __attribute__((user)) +# else +# define __user +# endif # define __kernel # define __safe # define __force -- cgit v1.2.3 From 62bc306e2083436675e33b5bdeb6a77907d35971 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 17 Jan 2017 11:07:15 -0500 Subject: audit: log 32-bit socketcalls 32-bit socketcalls were not being logged by audit on x86_64 systems. Log them. This is basically a duplicate of the call from net/socket.c:sys_socketcall(), but it addresses the impedance mismatch between 32-bit userspace process and 64-bit kernel audit. See: https://github.com/linux-audit/audit-kernel/issues/14 Signed-off-by: Richard Guy Briggs Acked-by: David S. Miller Signed-off-by: Paul Moore --- include/linux/audit.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 9d4443f93db6..2be99b276d29 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -387,6 +387,20 @@ static inline int audit_socketcall(int nargs, unsigned long *args) return __audit_socketcall(nargs, args); return 0; } + +static inline int audit_socketcall_compat(int nargs, u32 *args) +{ + unsigned long a[AUDITSC_ARGS]; + int i; + + if (audit_dummy_context()) + return 0; + + for (i = 0; i < nargs; i++) + a[i] = (unsigned long)args[i]; + return __audit_socketcall(nargs, a); +} + static inline int audit_sockaddr(int len, void *addr) { if (unlikely(!audit_dummy_context())) @@ -513,6 +527,12 @@ static inline int audit_socketcall(int nargs, unsigned long *args) { return 0; } + +static inline int audit_socketcall_compat(int nargs, u32 *args) +{ + return 0; +} + static inline void audit_fd_pair(int fd1, int fd2) { } static inline int audit_sockaddr(int len, void *addr) -- cgit v1.2.3 From fd61c6ba313de6758aeeab58fe03bd9fbbc8cea9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 17 Jan 2017 15:51:07 -0800 Subject: net: ipv6: remove nowait arg to rt6_fill_node All callers of rt6_fill_node pass 0 for nowait arg. Remove the arg and simplify rt6_fill_node accordingly. rt6_fill_node passes the nowait of 0 to ip6mr_get_route. Remove the nowait arg from it as well. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/mroute6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 19a1c0c2993b..ce44e3e96d27 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -116,7 +116,7 @@ struct mfc6_cache { struct rtmsg; extern int ip6mr_get_route(struct net *net, struct sk_buff *skb, - struct rtmsg *rtm, int nowait, u32 portid); + struct rtmsg *rtm, u32 portid); #ifdef CONFIG_IPV6_MROUTE extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb); -- cgit v1.2.3 From d407bd25a204bd66b7346dde24bd3d37ef0e0b05 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 18 Jan 2017 15:14:17 +0100 Subject: bpf: don't trigger OOM killer under pressure with map alloc This patch adds two helpers, bpf_map_area_alloc() and bpf_map_area_free(), that are to be used for map allocations. Using kmalloc() for very large allocations can cause excessive work within the page allocator, so i) fall back earlier to vmalloc() when the attempt is considered costly anyway, and even more importantly ii) don't trigger OOM killer with any of the allocators. Since this is based on a user space request, for example, when creating maps with element pre-allocation, we really want such requests to fail instead of killing other user space processes. Also, don't spam the kernel log with warnings should any of the allocations fail under pressure. Given that, we can make backend selection in bpf_map_area_alloc() generic, and convert all maps over to use this API for spots with potentially large allocation requests. Note, replacing the one kmalloc_array() is fine as overflow checks happen earlier in htab_map_alloc(), since it must also protect the multiplication for vmalloc() should kmalloc_array() fail. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 05cf951df3fe..3ed1f3b1d594 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -247,6 +247,8 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_precharge_memlock(u32 pages); +void *bpf_map_area_alloc(size_t size); +void bpf_map_area_free(void *base); extern int sysctl_unprivileged_bpf_disabled; -- cgit v1.2.3 From 4a7c972644c1151f6dd34ff4b5f7eacb239e22ee Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 18 Jan 2017 17:45:01 +0100 Subject: net: Remove usage of net_device last_rx member The network stack no longer uses the last_rx member of struct net_device since the bonding driver switched to use its own private last_rx in commit 9f242738376d ("bonding: use last_arp_rx in slave_last_rx()"). However, some drivers still (ab)use the field for their own purposes and some driver just update it without actually using it. Previously, there was an accompanying comment for the last_rx member added in commit 4dc89133f49b ("net: add a comment on netdev->last_rx") which asked drivers not to update is, unless really needed. However, this commend was removed in commit f8ff080dacec ("bonding: remove useless updating of slave->dev->last_rx"), so some drivers added later on still did update last_rx. Remove all usage of last_rx and switch three drivers (sky2, atp and smc91c92_cs) which actually read and write it to use their own private copy in netdev_priv. Compile-tested with allyesconfig and allmodconfig on x86 and arm. Cc: Eric Dumazet Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Cc: Mirko Lindner Cc: Stephen Hemminger Signed-off-by: Tobias Klauser Acked-by: Eric Dumazet Reviewed-by: Jay Vosburgh Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 97ae0ac513ee..3868c32d98af 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1551,7 +1551,6 @@ enum netdev_priv_flags { * @ax25_ptr: AX.25 specific data * @ieee80211_ptr: IEEE 802.11 specific data, assign before registering * - * @last_rx: Time of last Rx * @dev_addr: Hw address (before bcast, * because most packets are unicast) * @@ -1777,8 +1776,6 @@ struct net_device { /* * Cache lines mostly used on receive path (including eth_type_trans()) */ - unsigned long last_rx; - /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; -- cgit v1.2.3 From d69dece5f5b6bc7a5e39d2b6136ddc69469331fe Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 18 Jan 2017 17:09:05 -0800 Subject: LSM: Add /sys/kernel/security/lsm I am still tired of having to find indirect ways to determine what security modules are active on a system. I have added /sys/kernel/security/lsm, which contains a comma separated list of the active security modules. No more groping around in /proc/filesystems or other clever hacks. Unchanged from previous versions except for being updated to the latest security next branch. Signed-off-by: Casey Schaufler Acked-by: John Johansen Acked-by: Paul Moore Acked-by: Kees Cook Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 9cf50ad2fe20..0f3c309065d7 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1875,6 +1875,7 @@ struct security_hook_list { struct list_head list; struct list_head *head; union security_list_options hook; + char *lsm; }; /* @@ -1887,15 +1888,10 @@ struct security_hook_list { { .head = &security_hook_heads.HEAD, .hook = { .HEAD = HOOK } } extern struct security_hook_heads security_hook_heads; +extern char *lsm_names; -static inline void security_add_hooks(struct security_hook_list *hooks, - int count) -{ - int i; - - for (i = 0; i < count; i++) - list_add_tail_rcu(&hooks[i].list, hooks[i].head); -} +extern void security_add_hooks(struct security_hook_list *hooks, int count, + char *lsm); #ifdef CONFIG_SECURITY_SELINUX_DISABLE /* -- cgit v1.2.3 From 85b36c931ff328297572a3e6136fac573795ad79 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 18 Jan 2017 09:38:04 -0800 Subject: jump_labels: Move header guard #endif down where it belongs The ending header guard is misplaced. This has no functional change, this is just an eye-sore. Signed-off-by: Luis R. Rodriguez Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Cc: bp@suse.de Cc: catalin.marinas@arm.com Cc: jbaron@akamai.com Cc: pbonzini@redhat.com Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/20170118173804.16281-1-mcgrof@kernel.org Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index a0547c571800..b63d6b7b0db0 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -402,6 +402,6 @@ extern bool ____wrong_branch_error(void); #define static_branch_enable(x) static_key_enable(&(x)->key) #define static_branch_disable(x) static_key_disable(&(x)->key) -#endif /* _LINUX_JUMP_LABEL_H */ - #endif /* __ASSEMBLY__ */ + +#endif /* _LINUX_JUMP_LABEL_H */ -- cgit v1.2.3 From 739e6f5945d88dcee01590913f6886132a10c215 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 11 Jan 2017 13:37:07 +0100 Subject: gpio: provide lockdep keys for nested/unnested irqchips The helper function for adding a GPIO chip compiles in a lockdep key for debugging, the same key is needed for nested chips as well. The macro construction is unreadable, replace this with two static inlines instead. The _gpiochip_irqchip_add prefixed function is not helpful, rename it with gpiochip_irqchip_add_key() that tell us what the function is actually doing. Fixes: d245b3f9bd36 ("gpio: simplify adding threaded interrupts") Cc: Roger Quadros Reported-by: Clemens Gruber Reported-by: Roger Quadros Reported-by: Grygorii Strashko Tested-by: Clemens Gruber Tested-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 70 ++++++++++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index c2748accea71..e973faba69dc 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -274,37 +274,67 @@ void gpiochip_set_nested_irqchip(struct gpio_chip *gpiochip, struct irq_chip *irqchip, int parent_irq); -int _gpiochip_irqchip_add(struct gpio_chip *gpiochip, +int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, + struct irq_chip *irqchip, + unsigned int first_irq, + irq_flow_handler_t handler, + unsigned int type, + bool nested, + struct lock_class_key *lock_key); + +#ifdef CONFIG_LOCKDEP + +/* + * Lockdep requires that each irqchip instance be created with a + * unique key so as to avoid unnecessary warnings. This upfront + * boilerplate static inlines provides such a key for each + * unique instance. + */ +static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, + struct irq_chip *irqchip, + unsigned int first_irq, + irq_flow_handler_t handler, + unsigned int type) +{ + static struct lock_class_key key; + + return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, + handler, type, false, &key); +} + +static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, struct irq_chip *irqchip, unsigned int first_irq, irq_flow_handler_t handler, - unsigned int type, - bool nested, - struct lock_class_key *lock_key); + unsigned int type) +{ + + static struct lock_class_key key; + + return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, + handler, type, true, &key); +} +#else +static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, + struct irq_chip *irqchip, + unsigned int first_irq, + irq_flow_handler_t handler, + unsigned int type) +{ + return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, + handler, type, false, NULL); +} -/* FIXME: I assume threaded IRQchips do not have the lockdep problem */ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, struct irq_chip *irqchip, unsigned int first_irq, irq_flow_handler_t handler, unsigned int type) { - return _gpiochip_irqchip_add(gpiochip, irqchip, first_irq, - handler, type, true, NULL); + return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, + handler, type, true, NULL); } - -#ifdef CONFIG_LOCKDEP -#define gpiochip_irqchip_add(...) \ -( \ - ({ \ - static struct lock_class_key _key; \ - _gpiochip_irqchip_add(__VA_ARGS__, false, &_key); \ - }) \ -) -#else -#define gpiochip_irqchip_add(...) \ - _gpiochip_irqchip_add(__VA_ARGS__, false, NULL) -#endif +#endif /* CONFIG_LOCKDEP */ #endif /* CONFIG_GPIOLIB_IRQCHIP */ -- cgit v1.2.3 From f95bd041203b9f27a14e6ab733b9598049d7ffef Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 13 Jan 2017 11:00:25 +0100 Subject: memory: aemif: allow passing device lookup table as platform data TI aemif driver creates its own subnodes of the device tree in order to guarantee that all child devices are probed after the AEMIF timing parameters are configured. Some devices (e.g. da850) use struct of_dev_auxdata for clock lookup but nodes created from within the aemif driver can't access the lookup table. Create a platform data structure that holds a pointer to of_dev_auxdata so that we can use it with of_platform_populate(). Signed-off-by: Bartosz Golaszewski Acked-by: Sekhar Nori Acked-by: Santosh Shilimkar Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/ti-aemif.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/linux/platform_data/ti-aemif.h (limited to 'include/linux') diff --git a/include/linux/platform_data/ti-aemif.h b/include/linux/platform_data/ti-aemif.h new file mode 100644 index 000000000000..ac72e115093c --- /dev/null +++ b/include/linux/platform_data/ti-aemif.h @@ -0,0 +1,23 @@ +/* + * TI DaVinci AEMIF platform glue. + * + * Copyright (C) 2017 BayLibre SAS + * + * Author: + * Bartosz Golaszewski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __TI_DAVINCI_AEMIF_DATA_H__ +#define __TI_DAVINCI_AEMIF_DATA_H__ + +#include + +struct aemif_platform_data { + struct of_dev_auxdata *dev_lookup; +}; + +#endif /* __TI_DAVINCI_AEMIF_DATA_H__ */ -- cgit v1.2.3 From 6d2c5d6c46dd3d9924831efd6c913fdf4d484985 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 16 Jan 2017 17:50:02 +0100 Subject: kmod: make usermodehelper path a const string This is in preparation for making it so that usermode helper programs can't be changed, if desired, by userspace. We will tackle the mess of cleaning up the write-ability of argv and env later, that's going to take more work, for much less gain... Signed-off-by: Greg Kroah-Hartman --- include/linux/kmod.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index fcfd2bf14d3f..c4e441e00db5 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -56,7 +56,7 @@ struct file; struct subprocess_info { struct work_struct work; struct completion *complete; - char *path; + const char *path; char **argv; char **envp; int wait; @@ -67,10 +67,11 @@ struct subprocess_info { }; extern int -call_usermodehelper(char *path, char **argv, char **envp, int wait); +call_usermodehelper(const char *path, char **argv, char **envp, int wait); extern struct subprocess_info * -call_usermodehelper_setup(char *path, char **argv, char **envp, gfp_t gfp_mask, +call_usermodehelper_setup(const char *path, char **argv, char **envp, + gfp_t gfp_mask, int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *), void *data); -- cgit v1.2.3 From 219fb0c1436e4893a290ba270bc0e644d02465a3 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 11 Jan 2017 16:43:37 +0200 Subject: serial: sh-sci: Remove the platform data dma slave rx/tx channel IDs Only SH platforms still use platform data for the sh-sci, and none of them declare DMA channels connected to the SCI. Remove the corresponding platform data fields and simplify the driver accordingly. Signed-off-by: Laurent Pinchart Reviewed-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_sci.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h index 9f2bfd055742..1a894c47bfc0 100644 --- a/include/linux/serial_sci.h +++ b/include/linux/serial_sci.h @@ -71,9 +71,6 @@ struct plat_sci_port { unsigned char regtype; struct plat_sci_port_ops *ops; - - unsigned int dma_slave_tx; - unsigned int dma_slave_rx; }; #endif /* __LINUX_SERIAL_SCI_H */ -- cgit v1.2.3 From d5cb1319a91d4f1328b1c70b82c5899acd96af85 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 11 Jan 2017 16:43:38 +0200 Subject: serial: sh-sci: Remove manual break debouncing The sh-sci driver implements manual break debouncing for a few SH platforms by reading the value of the RX pin port register. This feature is optional and the driver considers all negative or zero values of the platform data port_reg field as invalid. As the four platforms that set the field to a register address all use an address higher than 0x7fffffff, the driver will always consider the value as invalid and never perform debouncing. The feature is unused, remove it. Debouncing could be implemented properly in the future using the pinctrl and GPIO APIs if desired. Signed-off-by: Laurent Pinchart Reviewed-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_sci.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h index 1a894c47bfc0..b4419931bf4c 100644 --- a/include/linux/serial_sci.h +++ b/include/linux/serial_sci.h @@ -9,8 +9,6 @@ * Generic header for SuperH (H)SCI(F) (used by sh/sh64 and related parts) */ -#define SCIx_NOT_SUPPORTED (-1) - /* Serial Control Register (@ = not supported by all parts) */ #define SCSCR_TIE BIT(7) /* Transmit Interrupt Enable */ #define SCSCR_RIE BIT(6) /* Receive Interrupt Enable */ @@ -41,8 +39,6 @@ enum { SCIx_NR_REGTYPES, }; -struct device; - struct plat_sci_port_ops { void (*init_pins)(struct uart_port *, unsigned int cflag); }; @@ -66,7 +62,6 @@ struct plat_sci_port { /* * Platform overrides if necessary, defaults otherwise. */ - int port_reg; unsigned char regshift; unsigned char regtype; -- cgit v1.2.3 From 97ed9790c514066bfae67f22e084b505ed5af436 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 11 Jan 2017 16:43:39 +0200 Subject: serial: sh-sci: Remove unused platform data capabilities field The field isn't set by any platform but is only used internally in the driver to hold data parsed from DT. Move it to the sci_port structure. Signed-off-by: Laurent Pinchart Reviewed-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_sci.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h index b4419931bf4c..f9a4526f4ec5 100644 --- a/include/linux/serial_sci.h +++ b/include/linux/serial_sci.h @@ -43,18 +43,12 @@ struct plat_sci_port_ops { void (*init_pins)(struct uart_port *, unsigned int cflag); }; -/* - * Port-specific capabilities - */ -#define SCIx_HAVE_RTSCTS BIT(0) - /* * Platform device specific platform_data struct */ struct plat_sci_port { unsigned int type; /* SCI / SCIF / IRDA / HSCIF */ upf_t flags; /* UPF_* flags */ - unsigned long capabilities; /* Port features/capabilities */ unsigned int sampling_rate; unsigned int scscr; /* SCSCR initialization */ -- cgit v1.2.3 From dfc80387aefb78161f83732804c6d01c89c24595 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 11 Jan 2017 16:43:40 +0200 Subject: serial: sh-sci: Compute the regshift value for SCI ports SCI instances found in SH SoCs have different spacing between registers depending on the SoC. The platform data contains a regshift field that tells the driver by how many bits to shift the register offset to compute its address. We can compute the regshift value automatically based on the memory resource size, there's no need to pass the value through platform data. Fix the sh7750 SCI and sh7760 SIM port memory resources length to ensure proper computation of the regshift value. Signed-off-by: Laurent Pinchart Reviewed-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_sci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h index f9a4526f4ec5..e598eaef3962 100644 --- a/include/linux/serial_sci.h +++ b/include/linux/serial_sci.h @@ -56,7 +56,6 @@ struct plat_sci_port { /* * Platform overrides if necessary, defaults otherwise. */ - unsigned char regshift; unsigned char regtype; struct plat_sci_port_ops *ops; -- cgit v1.2.3 From 9ed90d20449b01beb71a4e125d291a36c80c4ad4 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 16 Jan 2017 16:54:28 -0600 Subject: tty: move the non-file related parts of tty_release to new tty_release_struct For in-kernel tty users, we need to be able to create and destroy 'struct tty' that are not associated with a file. The creation side is fine, but tty_release() needs to be split into the file handle portion and the struct tty portion. Introduce a new function, tty_release_struct, to handle just the destroying of a struct tty. Signed-off-by: Rob Herring Reviewed-by: Andy Shevchenko Reviewed-By: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 40144f382516..86c7853282b7 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -528,6 +528,7 @@ extern int tty_alloc_file(struct file *file); extern void tty_add_file(struct tty_struct *tty, struct file *file); extern void tty_free_file(struct file *file); extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx); +extern void tty_release_struct(struct tty_struct *tty, int idx); extern int tty_release(struct inode *inode, struct file *filp); extern void tty_init_termios(struct tty_struct *tty); extern int tty_standard_install(struct tty_driver *driver, -- cgit v1.2.3 From 134e6a034cb004ed5acd3048792de70ced1c6cf5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 19 Jan 2017 08:57:14 -0500 Subject: tracing: Show number of constants profiled in likely profiler Now that constants are traced, it is useful to see the number of constants that are traced in the likely/unlikely profiler in order to know if they should be ignored or not. The likely/unlikely will display a number after the "correct" number if a "constant" count exists. Signed-off-by: Steven Rostedt (VMware) --- include/linux/compiler.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index bbbe1570de1c..a73cc9afa784 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -101,13 +101,18 @@ struct ftrace_branch_data { }; }; +struct ftrace_likely_data { + struct ftrace_branch_data data; + unsigned long constant; +}; + /* * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code * to disable branch tracing on a per file basis. */ #if defined(CONFIG_TRACE_BRANCH_PROFILING) \ && !defined(DISABLE_BRANCH_PROFILING) && !defined(__CHECKER__) -void ftrace_likely_update(struct ftrace_branch_data *f, int val, +void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect, int is_constant); #define likely_notrace(x) __builtin_expect(!!(x), 1) @@ -115,13 +120,13 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, #define __branch_check__(x, expect, is_constant) ({ \ int ______r; \ - static struct ftrace_branch_data \ + static struct ftrace_likely_data \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_annotated_branch"))) \ ______f = { \ - .func = __func__, \ - .file = __FILE__, \ - .line = __LINE__, \ + .data.func = __func__, \ + .data.file = __FILE__, \ + .data.line = __LINE__, \ }; \ ______r = __builtin_expect(!!(x), expect); \ ftrace_likely_update(&______f, ______r, \ -- cgit v1.2.3 From 579b2a65d245c093d3e63845c320b9321f112b75 Mon Sep 17 00:00:00 2001 From: Mitchel Humpherys Date: Fri, 6 Jan 2017 18:58:08 +0530 Subject: iommu: add IOMMU_PRIV attribute Add the IOMMU_PRIV attribute, which is used to indicate privileged mappings. Reviewed-by: Robin Murphy Tested-by: Robin Murphy Signed-off-by: Mitchel Humpherys Acked-by: Will Deacon Signed-off-by: Will Deacon --- include/linux/iommu.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0ff5111f6959..69e2417a2965 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -31,6 +31,13 @@ #define IOMMU_CACHE (1 << 2) /* DMA cache coherency */ #define IOMMU_NOEXEC (1 << 3) #define IOMMU_MMIO (1 << 4) /* e.g. things like MSI doorbells */ +/* + * This is to make the IOMMU API setup privileged + * mapppings accessible by the master only at higher + * privileged execution level and inaccessible at + * less privileged levels. + */ +#define IOMMU_PRIV (1 << 5) struct iommu_ops; struct iommu_group; -- cgit v1.2.3 From b2fb366425ceb85dca56afa538257ec5a2c4f6d1 Mon Sep 17 00:00:00 2001 From: Mitchel Humpherys Date: Fri, 6 Jan 2017 18:58:11 +0530 Subject: common: DMA-mapping: add DMA_ATTR_PRIVILEGED attribute This patch adds the DMA_ATTR_PRIVILEGED attribute to the DMA-mapping subsystem. Some advanced peripherals such as remote processors and GPUs perform accesses to DMA buffers in both privileged "supervisor" and unprivileged "user" modes. This attribute is used to indicate to the DMA-mapping subsystem that the buffer is fully accessible at the elevated privilege level (and ideally inaccessible or at least read-only at the lesser-privileged levels). Cc: linux-doc@vger.kernel.org Reviewed-by: Robin Murphy Tested-by: Robin Murphy Acked-by: Will Deacon Signed-off-by: Mitchel Humpherys Signed-off-by: Will Deacon --- include/linux/dma-mapping.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 10c5a17b1f51..c24721a33b4c 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -62,6 +62,13 @@ */ #define DMA_ATTR_NO_WARN (1UL << 8) +/* + * DMA_ATTR_PRIVILEGED: used to indicate that the buffer is fully + * accessible at an elevated privilege level (and ideally inaccessible or + * at least read-only at lesser-privileged levels). + */ +#define DMA_ATTR_PRIVILEGED (1UL << 9) + /* * A dma_addr_t can hold any valid DMA or bus address for the platform. * It can be given to a device to use as a DMA source or target. A CPU cannot -- cgit v1.2.3 From 737c85ca1c3af4f97acb61cd53415ec039b31111 Mon Sep 17 00:00:00 2001 From: Mitchel Humpherys Date: Fri, 6 Jan 2017 18:58:12 +0530 Subject: arm64/dma-mapping: Implement DMA_ATTR_PRIVILEGED The newly added DMA_ATTR_PRIVILEGED is useful for creating mappings that are only accessible to privileged DMA engines. Implement it in dma-iommu.c so that the ARM64 DMA IOMMU mapper can make use of it. Reviewed-by: Robin Murphy Tested-by: Robin Murphy Acked-by: Will Deacon Signed-off-by: Mitchel Humpherys Signed-off-by: Will Deacon --- include/linux/dma-iommu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 7f7e9a7e3839..c5511e1d5560 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -34,7 +34,8 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size, struct device *dev); /* General helpers for DMA-API <-> IOMMU-API interaction */ -int dma_direction_to_prot(enum dma_data_direction dir, bool coherent); +int dma_info_to_prot(enum dma_data_direction dir, bool coherent, + unsigned long attrs); /* * These implement the bulk of the relevant DMA mapping callbacks, but require -- cgit v1.2.3 From c92d781f1a5ea19708b1e1e2b85a3fbd4a738b30 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 16 Jan 2017 16:54:31 -0600 Subject: tty: constify tty_ldisc_receive_buf buffer pointer This is needed to work with the client operations which uses const ptrs. Really, the flags pointer could be const, too, but this would be a tree wide fix. Signed-off-by: Rob Herring Reviewed-by: Andy Shevchenko Reviewed-By: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 86c7853282b7..21c0fabfed60 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -657,7 +657,7 @@ extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty); extern void tty_ldisc_release(struct tty_struct *tty); extern void tty_ldisc_init(struct tty_struct *tty); extern void tty_ldisc_deinit(struct tty_struct *tty); -extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, unsigned char *p, +extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p, char *f, int count); /* n_tty.c */ -- cgit v1.2.3 From 4567d686f5c6d955e57a3afa1741944c1e7f4033 Mon Sep 17 00:00:00 2001 From: Volodymyr Bendiuga Date: Thu, 19 Jan 2017 17:05:04 +0100 Subject: phy: increase size of MII_BUS_ID_SIZE and bus_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some bus names are pretty long and do not fit into 17 chars. Increase therefore MII_BUS_ID_SIZE and phy_fixup.bus_id to larger number. Now mii_bus.id can host larger name. Signed-off-by: Volodymyr Bendiuga Signed-off-by: Magnus Öberg Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index f7d95f644eed..5c9d2529685f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -158,11 +158,7 @@ static inline const char *phy_modes(phy_interface_t interface) /* Used when trying to connect to a specific phy (mii bus id:phy device id) */ #define PHY_ID_FMT "%s:%02x" -/* - * Need to be a little smaller than phydev->dev.bus_id to leave room - * for the ":%02x" - */ -#define MII_BUS_ID_SIZE (20 - 3) +#define MII_BUS_ID_SIZE 61 /* Or MII_ADDR_C45 into regnum for read/write on mii_bus to enable the 21 bit IEEE 802.3ae clause 45 addressing mode used by 10GIGE phy chips. */ @@ -632,7 +628,7 @@ struct phy_driver { /* A Structure for boards to register fixups with the PHY Lib */ struct phy_fixup { struct list_head list; - char bus_id[20]; + char bus_id[MII_BUS_ID_SIZE + 3]; u32 phy_uid; u32 phy_uid_mask; int (*run)(struct phy_device *phydev); -- cgit v1.2.3 From f9a1ef720e9e32bc6a4a382c15ac77d62749c79e Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Mon, 10 Oct 2016 16:05:53 +0300 Subject: net/mlx5: Add MTPPS and MTPPSE registers infrastructure Implement query and set functionality for MTPPS and MTPPSE registers. MTPPS (Management Pulse Per Second) provides the device PPS capabilities, configures the PPS in and out modules and holds the PPS in time stamp. Query MTPPS is supported only when HCA_CAP.pps is set and modify is supported when HCA_CAP.pps_modify is set. MTPPSE (Management Pulse Per Second Event) configures the different event generation modes for PPS. Supported when HCA_CAP.pps is set. Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 18 +++++++++++++ include/linux/mlx5/driver.h | 3 +++ include/linux/mlx5/mlx5_ifc.h | 60 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 80 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 7c5265db02a9..6ac8eb5a89ca 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -289,6 +289,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_GPIO_EVENT = 0x15, MLX5_EVENT_TYPE_PORT_MODULE_EVENT = 0x16, MLX5_EVENT_TYPE_REMOTE_CONFIG = 0x19, + MLX5_EVENT_TYPE_PPS_EVENT = 0x25, MLX5_EVENT_TYPE_DB_BF_CONGESTION = 0x1a, MLX5_EVENT_TYPE_STALL_EVENT = 0x1b, @@ -569,6 +570,22 @@ struct mlx5_eqe_port_module { u8 error_type; } __packed; +struct mlx5_eqe_pps { + u8 rsvd0[3]; + u8 pin; + u8 rsvd1[4]; + union { + struct { + __be32 time_sec; + __be32 time_nsec; + }; + struct { + __be64 time_stamp; + }; + }; + u8 rsvd2[12]; +} __packed; + union ev_data { __be32 raw[7]; struct mlx5_eqe_cmd cmd; @@ -583,6 +600,7 @@ union ev_data { struct mlx5_eqe_page_fault page_fault; struct mlx5_eqe_vport_change vport_change; struct mlx5_eqe_port_module port_module; + struct mlx5_eqe_pps pps; } __packed; struct mlx5_eqe { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3a309f6a4a15..ebbc8834063b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -125,6 +125,8 @@ enum { MLX5_REG_HOST_ENDIANNESS = 0x7004, MLX5_REG_MCIA = 0x9014, MLX5_REG_MLCR = 0x902b, + MLX5_REG_MTPPS = 0x9053, + MLX5_REG_MTPPSE = 0x9054, }; enum mlx5_dcbx_oper_mode { @@ -172,6 +174,7 @@ enum mlx5_dev_event { MLX5_DEV_EVENT_PKEY_CHANGE, MLX5_DEV_EVENT_GUID_CHANGE, MLX5_DEV_EVENT_CLIENT_REREG, + MLX5_DEV_EVENT_PPS, }; enum mlx5_port_status { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 37327f6ba9cb..f5292f3b0301 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -835,7 +835,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 port_type[0x2]; u8 num_ports[0x8]; - u8 reserved_at_1c0[0x3]; + u8 reserved_at_1c0[0x1]; + u8 pps[0x1]; + u8 pps_modify[0x1]; u8 log_max_msg[0x5]; u8 reserved_at_1c8[0x4]; u8 max_tc[0x4]; @@ -7821,6 +7823,60 @@ struct mlx5_ifc_initial_seg_bits { u8 reserved_at_80a0[0x17fc0]; }; +struct mlx5_ifc_mtpps_reg_bits { + u8 reserved_at_0[0xc]; + u8 cap_number_of_pps_pins[0x4]; + u8 reserved_at_10[0x4]; + u8 cap_max_num_of_pps_in_pins[0x4]; + u8 reserved_at_18[0x4]; + u8 cap_max_num_of_pps_out_pins[0x4]; + + u8 reserved_at_20[0x24]; + u8 cap_pin_3_mode[0x4]; + u8 reserved_at_48[0x4]; + u8 cap_pin_2_mode[0x4]; + u8 reserved_at_50[0x4]; + u8 cap_pin_1_mode[0x4]; + u8 reserved_at_58[0x4]; + u8 cap_pin_0_mode[0x4]; + + u8 reserved_at_60[0x4]; + u8 cap_pin_7_mode[0x4]; + u8 reserved_at_68[0x4]; + u8 cap_pin_6_mode[0x4]; + u8 reserved_at_70[0x4]; + u8 cap_pin_5_mode[0x4]; + u8 reserved_at_78[0x4]; + u8 cap_pin_4_mode[0x4]; + + u8 reserved_at_80[0x80]; + + u8 enable[0x1]; + u8 reserved_at_101[0xb]; + u8 pattern[0x4]; + u8 reserved_at_110[0x4]; + u8 pin_mode[0x4]; + u8 pin[0x8]; + + u8 reserved_at_120[0x20]; + + u8 time_stamp[0x40]; + + u8 out_pulse_duration[0x10]; + u8 out_periodic_adjustment[0x10]; + + u8 reserved_at_1a0[0x60]; +}; + +struct mlx5_ifc_mtppse_reg_bits { + u8 reserved_at_0[0x18]; + u8 pin[0x8]; + u8 event_arm[0x1]; + u8 reserved_at_21[0x1b]; + u8 event_generation_mode[0x4]; + u8 reserved_at_40[0x40]; +}; + union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_bufferx_reg_bits bufferx_reg; struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; @@ -7865,6 +7921,8 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_pvlc_reg_bits pvlc_reg; struct mlx5_ifc_slrg_reg_bits slrg_reg; struct mlx5_ifc_sltp_reg_bits sltp_reg; + struct mlx5_ifc_mtpps_reg_bits mtpps_reg; + struct mlx5_ifc_mtppse_reg_bits mtppse_reg; u8 reserved_at_0[0x60e0]; }; -- cgit v1.2.3 From 105433659d394b70dc3b6ceb65d0c1673e14cee1 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Sun, 9 Oct 2016 16:25:43 +0300 Subject: net/mlx5: Add support to s-tag in mlx5 firmware interface Add svlan_tag and rename vlan_tag to cvlan_tag in flow table entry match param. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f5292f3b0301..6f19e4b8574f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -365,8 +365,8 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 ip_protocol[0x8]; u8 ip_dscp[0x6]; u8 ip_ecn[0x2]; - u8 vlan_tag[0x1]; - u8 reserved_at_91[0x1]; + u8 cvlan_tag[0x1]; + u8 svlan_tag[0x1]; u8 frag[0x1]; u8 reserved_at_93[0x4]; u8 tcp_flags[0x9]; @@ -398,9 +398,11 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 inner_second_cfi[0x1]; u8 inner_second_vid[0xc]; - u8 outer_second_vlan_tag[0x1]; - u8 inner_second_vlan_tag[0x1]; - u8 reserved_at_62[0xe]; + u8 outer_second_cvlan_tag[0x1]; + u8 inner_second_cvlan_tag[0x1]; + u8 outer_second_svlan_tag[0x1]; + u8 inner_second_svlan_tag[0x1]; + u8 reserved_at_64[0xc]; u8 gre_protocol[0x10]; u8 gre_key_h[0x18]; -- cgit v1.2.3 From cfdcbceaeffc669b70d904d80a2df9c86c232566 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 8 Dec 2016 15:52:00 +0200 Subject: net/mlx5: Expose PCAM, MCAM registers infrastructure PCAM: Ports capabilities mask register. MCAM: Management capabilities mask register. PCAM and MCAM registers will provide information regarding firmware support for different features, in order to avoid cases where new driver combined with old firmware results in syndromes (for ex. PCIe counters before this patchset). Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 16 ++++++++++++ include/linux/mlx5/driver.h | 2 ++ include/linux/mlx5/mlx5_ifc.h | 60 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 77 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 6ac8eb5a89ca..79f38e67fe2f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -970,6 +970,22 @@ enum mlx5_cap_type { MLX5_CAP_NUM }; +enum mlx5_pcam_reg_groups { + MLX5_PCAM_REGS_5000_TO_507F = 0x0, +}; + +enum mlx5_pcam_feature_groups { + MLX5_PCAM_FEATURE_ENHANCED_FEATURES = 0x0, +}; + +enum mlx5_mcam_reg_groups { + MLX5_MCAM_REGS_FIRST_128 = 0x0, +}; + +enum mlx5_mcam_feature_groups { + MLX5_MCAM_FEATURE_ENHANCED_FEATURES = 0x0, +}; + /* GET Dev Caps macros */ #define MLX5_CAP_GEN(mdev, cap) \ MLX5_GET(cmd_hca_cap, mdev->hca_caps_cur[MLX5_CAP_GENERAL], cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ebbc8834063b..60c2b156da8c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -121,12 +121,14 @@ enum { MLX5_REG_PVLC = 0x500f, MLX5_REG_PCMR = 0x5041, MLX5_REG_PMLP = 0x5002, + MLX5_REG_PCAM = 0x507f, MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, MLX5_REG_MCIA = 0x9014, MLX5_REG_MLCR = 0x902b, MLX5_REG_MTPPS = 0x9053, MLX5_REG_MTPPSE = 0x9054, + MLX5_REG_MCAM = 0x907f, }; enum mlx5_dcbx_oper_mode { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6f19e4b8574f..e8061a95326a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -826,7 +826,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 nic_flow_table[0x1]; u8 eswitch_flow_table[0x1]; u8 early_vf_enable[0x1]; - u8 reserved_at_1a9[0x2]; + u8 mcam_reg[0x1]; + u8 pcam_reg[0x1]; u8 local_ca_ack_delay[0x5]; u8 port_module_event[0x1]; u8 reserved_at_1b1[0x1]; @@ -7481,6 +7482,63 @@ struct mlx5_ifc_peir_reg_bits { u8 error_type[0x8]; }; +struct mlx5_ifc_pcam_enhanced_features_bits { + u8 reserved_at_0[0x7e]; + + u8 ppcnt_discard_group[0x1]; + u8 ppcnt_statistical_group[0x1]; +}; + +struct mlx5_ifc_pcam_reg_bits { + u8 reserved_at_0[0x8]; + u8 feature_group[0x8]; + u8 reserved_at_10[0x8]; + u8 access_reg_group[0x8]; + + u8 reserved_at_20[0x20]; + + union { + u8 reserved_at_0[0x80]; + } port_access_reg_cap_mask; + + u8 reserved_at_c0[0x80]; + + union { + struct mlx5_ifc_pcam_enhanced_features_bits enhanced_features; + u8 reserved_at_0[0x80]; + } feature_cap_mask; + + u8 reserved_at_1c0[0xc0]; +}; + +struct mlx5_ifc_mcam_enhanced_features_bits { + u8 reserved_at_0[0x7f]; + + u8 pcie_performance_group[0x1]; +}; + +struct mlx5_ifc_mcam_reg_bits { + u8 reserved_at_0[0x8]; + u8 feature_group[0x8]; + u8 reserved_at_10[0x8]; + u8 access_reg_group[0x8]; + + u8 reserved_at_20[0x20]; + + union { + u8 reserved_at_0[0x80]; + } mng_access_reg_cap_mask; + + u8 reserved_at_c0[0x80]; + + union { + struct mlx5_ifc_mcam_enhanced_features_bits enhanced_features; + u8 reserved_at_0[0x80]; + } mng_feature_cap_mask; + + u8 reserved_at_1c0[0x80]; +}; + struct mlx5_ifc_pcap_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; -- cgit v1.2.3 From 71862561f3a62015a11de16d1c306481e8415c08 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 8 Dec 2016 16:03:31 +0200 Subject: net/mlx5: Query and cache PCAM, MCAM registers on initialization On load_one, we now cache our capabilities registers internally, similar to QUERY_HCA_CAP. Capabilities can later be queried using macros introduced in this patch. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 6 ++++++ include/linux/mlx5/driver.h | 4 ++++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 79f38e67fe2f..f21528abfb74 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1081,6 +1081,12 @@ enum mlx5_mcam_feature_groups { #define MLX5_CAP_QOS(mdev, cap)\ MLX5_GET(qos_cap, mdev->hca_caps_cur[MLX5_CAP_QOS], cap) +#define MLX5_CAP_PCAM_FEATURE(mdev, fld) \ + MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld) + +#define MLX5_CAP_MCAM_FEATURE(mdev, fld) \ + MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 60c2b156da8c..69c4661d391e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -739,6 +739,10 @@ struct mlx5_core_dev { struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; u32 hca_caps_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; + struct { + u32 pcam[MLX5_ST_SZ_DW(pcam_reg)]; + u32 mcam[MLX5_ST_SZ_DW(mcam_reg)]; + } caps; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; enum mlx5_device_state state; -- cgit v1.2.3 From d8dc0508c50f838f8abcf023cd74ca9a0f86b5a8 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 27 Sep 2016 17:04:51 +0300 Subject: net/mlx5: Add PPCNT physical layer statistical group infrastructure Add the needed infrastructure for future use of PPCNT physical layer statistical group. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 1 + include/linux/mlx5/mlx5_ifc.h | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f21528abfb74..5ad1d3ca47dc 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1115,6 +1115,7 @@ enum { MLX5_PER_PRIORITY_COUNTERS_GROUP = 0x10, MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP = 0x12, + MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP = 0x16, MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index e8061a95326a..f4860fa719d9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1384,6 +1384,42 @@ struct mlx5_ifc_phys_layer_cntrs_bits { u8 reserved_at_640[0x180]; }; +struct mlx5_ifc_phys_layer_statistical_cntrs_bits { + u8 time_since_last_clear_high[0x20]; + + u8 time_since_last_clear_low[0x20]; + + u8 phy_received_bits_high[0x20]; + + u8 phy_received_bits_low[0x20]; + + u8 phy_symbol_errors_high[0x20]; + + u8 phy_symbol_errors_low[0x20]; + + u8 phy_corrected_bits_high[0x20]; + + u8 phy_corrected_bits_low[0x20]; + + u8 phy_corrected_bits_lane0_high[0x20]; + + u8 phy_corrected_bits_lane0_low[0x20]; + + u8 phy_corrected_bits_lane1_high[0x20]; + + u8 phy_corrected_bits_lane1_low[0x20]; + + u8 phy_corrected_bits_lane2_high[0x20]; + + u8 phy_corrected_bits_lane2_low[0x20]; + + u8 phy_corrected_bits_lane3_high[0x20]; + + u8 phy_corrected_bits_lane3_low[0x20]; + + u8 reserved_at_200[0x5c0]; +}; + struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits { u8 symbol_error_counter[0x10]; @@ -2928,6 +2964,7 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout; struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout; struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; + struct mlx5_ifc_phys_layer_statistical_cntrs_bits phys_layer_statistical_cntrs; u8 reserved_at_0[0x7c0]; }; -- cgit v1.2.3 From 8ed1a6306dc7892b63be7cdb1e3b1123265f42ff Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 17 Nov 2016 13:46:01 +0200 Subject: net/mlx5: Add MPCNT register infrastructure Add the needed infrastructure for future use of MPCNT register. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 4 ++++ include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 5ad1d3ca47dc..1cf97dce8215 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1119,6 +1119,10 @@ enum { MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, }; +enum { + MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP = 0x0, +}; + static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) { if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 69c4661d391e..f4d6d390a9cf 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -126,6 +126,7 @@ enum { MLX5_REG_HOST_ENDIANNESS = 0x7004, MLX5_REG_MCIA = 0x9014, MLX5_REG_MLCR = 0x902b, + MLX5_REG_MPCNT = 0x9051, MLX5_REG_MTPPS = 0x9053, MLX5_REG_MTPPSE = 0x9054, MLX5_REG_MCAM = 0x907f, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f4860fa719d9..d96ebc319d63 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1802,6 +1802,30 @@ struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits { u8 reserved_at_4c0[0x300]; }; +struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits { + u8 life_time_counter_high[0x20]; + + u8 life_time_counter_low[0x20]; + + u8 rx_errors[0x20]; + + u8 tx_errors[0x20]; + + u8 l0_to_recovery_eieos[0x20]; + + u8 l0_to_recovery_ts[0x20]; + + u8 l0_to_recovery_framing[0x20]; + + u8 l0_to_recovery_retrain[0x20]; + + u8 crc_error_dllp[0x20]; + + u8 crc_error_tlp[0x20]; + + u8 reserved_at_140[0x680]; +}; + struct mlx5_ifc_cmd_inter_comp_event_bits { u8 command_completion_vector[0x20]; @@ -2968,6 +2992,11 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { u8 reserved_at_0[0x7c0]; }; +union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits { + struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits pcie_perf_cntrs_grp_data_layout; + u8 reserved_at_0[0x7c0]; +}; + union mlx5_ifc_event_auto_bits { struct mlx5_ifc_comp_event_bits comp_event; struct mlx5_ifc_dct_events_bits dct_events; @@ -7290,6 +7319,18 @@ struct mlx5_ifc_ppcnt_reg_bits { union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set; }; +struct mlx5_ifc_mpcnt_reg_bits { + u8 reserved_at_0[0x8]; + u8 pcie_index[0x8]; + u8 reserved_at_10[0xa]; + u8 grp[0x6]; + + u8 clr[0x1]; + u8 reserved_at_21[0x1f]; + + union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits counter_set; +}; + struct mlx5_ifc_ppad_reg_bits { u8 reserved_at_0[0x3]; u8 single_mac[0x1]; @@ -8006,6 +8047,7 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_pmtu_reg_bits pmtu_reg; struct mlx5_ifc_ppad_reg_bits ppad_reg; struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg; + struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg; struct mlx5_ifc_pplm_reg_bits pplm_reg; struct mlx5_ifc_pplr_reg_bits pplr_reg; struct mlx5_ifc_ppsc_reg_bits ppsc_reg; -- cgit v1.2.3 From 701052c578195e6e02a22647fa6fd1c90c31dafd Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 14 Dec 2016 17:40:41 +0200 Subject: net/mlx5: Move cached hca caps to designated caps struct The caps structure consists of hca caps and port/management caps, all under one roof. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 34 +++++++++++++++++----------------- include/linux/mlx5/driver.h | 4 ++-- 2 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 1cf97dce8215..7b6cd67a263f 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -988,36 +988,36 @@ enum mlx5_mcam_feature_groups { /* GET Dev Caps macros */ #define MLX5_CAP_GEN(mdev, cap) \ - MLX5_GET(cmd_hca_cap, mdev->hca_caps_cur[MLX5_CAP_GENERAL], cap) + MLX5_GET(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap) #define MLX5_CAP_GEN_MAX(mdev, cap) \ - MLX5_GET(cmd_hca_cap, mdev->hca_caps_max[MLX5_CAP_GENERAL], cap) + MLX5_GET(cmd_hca_cap, mdev->caps.hca_max[MLX5_CAP_GENERAL], cap) #define MLX5_CAP_ETH(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ - mdev->hca_caps_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap) + mdev->caps.hca_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap) #define MLX5_CAP_ETH_MAX(mdev, cap) \ MLX5_GET(per_protocol_networking_offload_caps,\ - mdev->hca_caps_max[MLX5_CAP_ETHERNET_OFFLOADS], cap) + mdev->caps.hca_max[MLX5_CAP_ETHERNET_OFFLOADS], cap) #define MLX5_CAP_ROCE(mdev, cap) \ - MLX5_GET(roce_cap, mdev->hca_caps_cur[MLX5_CAP_ROCE], cap) + MLX5_GET(roce_cap, mdev->caps.hca_cur[MLX5_CAP_ROCE], cap) #define MLX5_CAP_ROCE_MAX(mdev, cap) \ - MLX5_GET(roce_cap, mdev->hca_caps_max[MLX5_CAP_ROCE], cap) + MLX5_GET(roce_cap, mdev->caps.hca_max[MLX5_CAP_ROCE], cap) #define MLX5_CAP_ATOMIC(mdev, cap) \ - MLX5_GET(atomic_caps, mdev->hca_caps_cur[MLX5_CAP_ATOMIC], cap) + MLX5_GET(atomic_caps, mdev->caps.hca_cur[MLX5_CAP_ATOMIC], cap) #define MLX5_CAP_ATOMIC_MAX(mdev, cap) \ - MLX5_GET(atomic_caps, mdev->hca_caps_max[MLX5_CAP_ATOMIC], cap) + MLX5_GET(atomic_caps, mdev->caps.hca_max[MLX5_CAP_ATOMIC], cap) #define MLX5_CAP_FLOWTABLE(mdev, cap) \ - MLX5_GET(flow_table_nic_cap, mdev->hca_caps_cur[MLX5_CAP_FLOW_TABLE], cap) + MLX5_GET(flow_table_nic_cap, mdev->caps.hca_cur[MLX5_CAP_FLOW_TABLE], cap) #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ - MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap) + MLX5_GET(flow_table_nic_cap, mdev->caps.hca_max[MLX5_CAP_FLOW_TABLE], cap) #define MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.cap) @@ -1039,11 +1039,11 @@ enum mlx5_mcam_feature_groups { #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ - mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) + mdev->caps.hca_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) #define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ - mdev->hca_caps_max[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) + mdev->caps.hca_max[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) #define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap) @@ -1065,21 +1065,21 @@ enum mlx5_mcam_feature_groups { #define MLX5_CAP_ESW(mdev, cap) \ MLX5_GET(e_switch_cap, \ - mdev->hca_caps_cur[MLX5_CAP_ESWITCH], cap) + mdev->caps.hca_cur[MLX5_CAP_ESWITCH], cap) #define MLX5_CAP_ESW_MAX(mdev, cap) \ MLX5_GET(e_switch_cap, \ - mdev->hca_caps_max[MLX5_CAP_ESWITCH], cap) + mdev->caps.hca_max[MLX5_CAP_ESWITCH], cap) #define MLX5_CAP_ODP(mdev, cap)\ - MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) + MLX5_GET(odp_cap, mdev->caps.hca_cur[MLX5_CAP_ODP], cap) #define MLX5_CAP_VECTOR_CALC(mdev, cap) \ MLX5_GET(vector_calc_cap, \ - mdev->hca_caps_cur[MLX5_CAP_VECTOR_CALC], cap) + mdev->caps.hca_cur[MLX5_CAP_VECTOR_CALC], cap) #define MLX5_CAP_QOS(mdev, cap)\ - MLX5_GET(qos_cap, mdev->hca_caps_cur[MLX5_CAP_QOS], cap) + MLX5_GET(qos_cap, mdev->caps.hca_cur[MLX5_CAP_QOS], cap) #define MLX5_CAP_PCAM_FEATURE(mdev, fld) \ MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f4d6d390a9cf..1bc4641734da 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -738,9 +738,9 @@ struct mlx5_core_dev { char board_id[MLX5_BOARD_ID_LEN]; struct mlx5_cmd cmd; struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; - u32 hca_caps_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; - u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; struct { + u32 hca_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; + u32 hca_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; u32 pcam[MLX5_ST_SZ_DW(pcam_reg)]; u32 mcam[MLX5_ST_SZ_DW(mcam_reg)]; } caps; -- cgit v1.2.3 From acb04058de49458010c44bb35b849d45113fd668 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Jan 2017 14:36:33 +0100 Subject: sched/clock: Fix hotplug crash Mike reported that he could trigger the WARN_ON_ONCE() in set_sched_clock_stable() using hotplug. This exposed a fundamental problem with the interface, we should never mark the TSC stable if we ever find it to be unstable. Therefore set_sched_clock_stable() is a broken interface. The reason it existed is that not having it is a pain, it means all relevant architecture code needs to call clear_sched_clock_stable() where appropriate. Of the three architectures that select HAVE_UNSTABLE_SCHED_CLOCK ia64 and parisc are trivial in that they never called set_sched_clock_stable(), so add an unconditional call to clear_sched_clock_stable() to them. For x86 the story is a lot more involved, and what this patch tries to do is ensure we preserve the status quo. So even is Cyrix or Transmeta have usable TSC they never called set_sched_clock_stable() so they now get an explicit mark unstable. Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 9881b024b7d7 ("sched/clock: Delay switching sched_clock to stable") Link: http://lkml.kernel.org/r/20170119133633.GB6536@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a8daed914eef..69e6852fede1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2547,7 +2547,6 @@ extern void sched_clock_init_late(void); * is reliable after all: */ extern int sched_clock_stable(void); -extern void set_sched_clock_stable(void); extern void clear_sched_clock_stable(void); extern void sched_clock_tick(void); -- cgit v1.2.3 From e326ce013a8e851193eb337aafb1aa396c533a61 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 20 Jan 2017 03:25:34 +0100 Subject: Revert "PM / sleep / ACPI: Use the ACPI_FADT_LOW_POWER_S0 flag" Revert commit 08b98d329165 (PM / sleep / ACPI: Use the ACPI_FADT_LOW_POWER_S0 flag) as it caused system suspend (in the default configuration) to fail on Dell XPS13 (9360) with the Kaby Lake processor. Fixes: 08b98d329165 (PM / sleep / ACPI: Use the ACPI_FADT_LOW_POWER_S0 flag) Reported-by: Paul Menzel Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 0c729c3c8549..d9718378a8be 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -194,8 +194,6 @@ struct platform_freeze_ops { }; #ifdef CONFIG_SUSPEND -extern suspend_state_t mem_sleep_default; - /** * suspend_set_ops - set platform dependent suspend operations * @ops: The new suspend operations to set. -- cgit v1.2.3 From 9c829c097f2f1cbebcfff69a7254b1df9852fe4e Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 28 Dec 2016 14:56:47 -0800 Subject: of: device: Support loading a module with OF based modalias In the case of ULPI devices, we want to be able to load the driver before registering the device so that we don't get stuck in a loop waiting for the phy module to appear and failing usb controller probe. Currently we request the ulpi module via the ulpi ids, but in the DT case we might need to request it with the OF based modalias instead. Add a common function that allows anyone to request a module with the OF based modalias. Acked-by: Rob Herring Cc: Signed-off-by: Stephen Boyd Signed-off-by: Peter Chen --- include/linux/of_device.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index cc7dd687a89d..e9afbcc8de12 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -37,6 +37,7 @@ extern const void *of_device_get_match_data(const struct device *dev); extern ssize_t of_device_get_modalias(struct device *dev, char *str, ssize_t len); +extern int of_device_request_module(struct device *dev); extern void of_device_uevent(struct device *dev, struct kobj_uevent_env *env); extern int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env); @@ -78,6 +79,11 @@ static inline int of_device_get_modalias(struct device *dev, return -ENODEV; } +static inline int of_device_request_module(struct device *dev) +{ + return -ENODEV; +} + static inline int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env) { -- cgit v1.2.3 From a89b94b53371bbfa582787c2fa3378000ea4263d Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 28 Dec 2016 14:56:51 -0800 Subject: usb: chipidea: Handle extcon events properly We're currently emulating the vbus and id interrupts in the OTGSC read API, but we also need to make sure that if we're handling the events with extcon that we don't enable the interrupts for those events in the hardware. Therefore, properly emulate this register if we're using extcon, but don't enable the interrupts. This allows me to get my cable connect/disconnect working properly without getting spurious interrupts on my device that uses an extcon for these two events. Acked-by: Peter Chen Cc: Greg Kroah-Hartman Cc: "Ivan T. Ivanov" Fixes: 3ecb3e09b042 ("usb: chipidea: Use extcon framework for VBUS and ID detect") Signed-off-by: Stephen Boyd Signed-off-by: Peter Chen --- include/linux/usb/chipidea.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index 5dd75fa47dd8..f9be467d6695 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -14,6 +14,7 @@ struct ci_hdrc; * struct ci_hdrc_cable - structure for external connector cable state tracking * @state: current state of the line * @changed: set to true when extcon event happen + * @enabled: set to true if we've enabled the vbus or id interrupt * @edev: device which generate events * @ci: driver state of the chipidea device * @nb: hold event notification callback @@ -22,6 +23,7 @@ struct ci_hdrc; struct ci_hdrc_cable { bool state; bool changed; + bool enabled; struct extcon_dev *edev; struct ci_hdrc *ci; struct notifier_block nb; -- cgit v1.2.3 From 8feb3680bd0363a8d784fa0d065e0a6cdc9e0cff Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 28 Dec 2016 14:56:52 -0800 Subject: usb: chipidea: Add platform flag for wrapper phy management The ULPI phy on qcom platforms needs to be initialized and powered on after a USB reset and before we toggle the run/stop bit. Otherwise, the phy locks up and doesn't work properly. Therefore, add a flag to skip any phy power management in the core layer, leaving it up to the glue driver to manage. Acked-by: Peter Chen Cc: Greg Kroah-Hartman Signed-off-by: Stephen Boyd Signed-off-by: Peter Chen --- include/linux/usb/chipidea.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index f9be467d6695..d07b162073f7 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -57,6 +57,7 @@ struct ci_hdrc_platform_data { #define CI_HDRC_OVERRIDE_AHB_BURST BIT(9) #define CI_HDRC_OVERRIDE_TX_BURST BIT(10) #define CI_HDRC_OVERRIDE_RX_BURST BIT(11) +#define CI_HDRC_OVERRIDE_PHY_CONTROL BIT(12) /* Glue layer manages phy */ enum usb_dr_mode dr_mode; #define CI_HDRC_CONTROLLER_RESET_EVENT 0 #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 -- cgit v1.2.3 From 5cc49268995a1f063a7a569299393e4cf9d06923 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 20 Jan 2017 15:11:55 +0800 Subject: usb: chipidea: Consolidate extcon notifiers The two extcon notifiers are almost the same except for the variable name for the cable structure and the id notifier inverts the cable->state logic. Make it the same and replace two functions with one to save some lines. This also makes it so that the id cable state is true when the id pin is pulled low, so we change the name of ->state to ->connected to properly reflect that we're interested in the cable being connected. Acked-by: Peter Chen Cc: Greg Kroah-Hartman Cc: "Ivan T. Ivanov" Signed-off-by: Stephen Boyd Signed-off-by: Peter Chen --- include/linux/usb/chipidea.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index d07b162073f7..7e3daa37cf60 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -12,7 +12,7 @@ struct ci_hdrc; /** * struct ci_hdrc_cable - structure for external connector cable state tracking - * @state: current state of the line + * @connected: true if cable is connected, false otherwise * @changed: set to true when extcon event happen * @enabled: set to true if we've enabled the vbus or id interrupt * @edev: device which generate events @@ -21,7 +21,7 @@ struct ci_hdrc; * @conn: used for notification registration */ struct ci_hdrc_cable { - bool state; + bool connected; bool changed; bool enabled; struct extcon_dev *edev; -- cgit v1.2.3 From 11893dae63da0f5b251cf7f9a24d64c8ff4771ff Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 28 Dec 2016 14:57:06 -0800 Subject: usb: chipidea: msm: Handle phy power states The ULPI phy on qcom platforms needs to be initialized and powered on after a USB reset and before we toggle the run/stop bit. Otherwise, the phy locks up and doesn't work properly. Hook the phy initialization into the RESET event and the phy power off into the STOPPED event. Acked-by: Peter Chen Cc: Greg Kroah-Hartman Signed-off-by: Stephen Boyd Signed-off-by: Peter Chen --- include/linux/usb/chipidea.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index 7e3daa37cf60..c5fdfcf99828 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -61,7 +61,7 @@ struct ci_hdrc_platform_data { enum usb_dr_mode dr_mode; #define CI_HDRC_CONTROLLER_RESET_EVENT 0 #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 - void (*notify_event) (struct ci_hdrc *ci, unsigned event); + int (*notify_event) (struct ci_hdrc *ci, unsigned event); struct regulator *reg_vbus; struct usb_otg_caps ci_otg_caps; bool tpl_support; -- cgit v1.2.3 From ee48c726d0b014ac8dd5ec803fb63ce0596a42cf Mon Sep 17 00:00:00 2001 From: Ramiro Oliveira Date: Fri, 13 Jan 2017 17:57:40 +0000 Subject: reset: Change shared flag from int to bool Since the new parameter being added is going to be a bool this patch changes the shared flag from int to bool to match the new parameter. Signed-off-by: Ramiro Oliveira Signed-off-by: Philipp Zabel --- include/linux/reset.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/reset.h b/include/linux/reset.h index 5daff15722d3..ec1d1fd28f5f 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -13,10 +13,10 @@ int reset_control_deassert(struct reset_control *rstc); int reset_control_status(struct reset_control *rstc); struct reset_control *__of_reset_control_get(struct device_node *node, - const char *id, int index, int shared); + const char *id, int index, bool shared); void reset_control_put(struct reset_control *rstc); struct reset_control *__devm_reset_control_get(struct device *dev, - const char *id, int index, int shared); + const char *id, int index, bool shared); int __must_check device_reset(struct device *dev); @@ -69,14 +69,14 @@ static inline int device_reset_optional(struct device *dev) static inline struct reset_control *__of_reset_control_get( struct device_node *node, - const char *id, int index, int shared) + const char *id, int index, bool shared) { return ERR_PTR(-ENOTSUPP); } static inline struct reset_control *__devm_reset_control_get( struct device *dev, - const char *id, int index, int shared) + const char *id, int index, bool shared) { return ERR_PTR(-ENOTSUPP); } @@ -132,19 +132,19 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id) static inline struct reset_control *reset_control_get_shared( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1); + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true); } static inline struct reset_control *reset_control_get_optional_exclusive( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false); } static inline struct reset_control *reset_control_get_optional_shared( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1); + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true); } /** @@ -185,7 +185,7 @@ static inline struct reset_control *of_reset_control_get_exclusive( static inline struct reset_control *of_reset_control_get_shared( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0, 1); + return __of_reset_control_get(node, id, 0, true); } /** @@ -202,7 +202,7 @@ static inline struct reset_control *of_reset_control_get_shared( static inline struct reset_control *of_reset_control_get_exclusive_by_index( struct device_node *node, int index) { - return __of_reset_control_get(node, NULL, index, 0); + return __of_reset_control_get(node, NULL, index, false); } /** @@ -230,7 +230,7 @@ static inline struct reset_control *of_reset_control_get_exclusive_by_index( static inline struct reset_control *of_reset_control_get_shared_by_index( struct device_node *node, int index) { - return __of_reset_control_get(node, NULL, index, 1); + return __of_reset_control_get(node, NULL, index, true); } /** @@ -252,7 +252,7 @@ __must_check devm_reset_control_get_exclusive(struct device *dev, #ifndef CONFIG_RESET_CONTROLLER WARN_ON(1); #endif - return __devm_reset_control_get(dev, id, 0, 0); + return __devm_reset_control_get(dev, id, 0, false); } /** @@ -267,19 +267,19 @@ __must_check devm_reset_control_get_exclusive(struct device *dev, static inline struct reset_control *devm_reset_control_get_shared( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, 1); + return __devm_reset_control_get(dev, id, 0, true); } static inline struct reset_control *devm_reset_control_get_optional_exclusive( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, 0); + return __devm_reset_control_get(dev, id, 0, false); } static inline struct reset_control *devm_reset_control_get_optional_shared( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, 1); + return __devm_reset_control_get(dev, id, 0, true); } /** @@ -297,7 +297,7 @@ static inline struct reset_control *devm_reset_control_get_optional_shared( static inline struct reset_control * devm_reset_control_get_exclusive_by_index(struct device *dev, int index) { - return __devm_reset_control_get(dev, NULL, index, 0); + return __devm_reset_control_get(dev, NULL, index, false); } /** @@ -313,7 +313,7 @@ devm_reset_control_get_exclusive_by_index(struct device *dev, int index) static inline struct reset_control * devm_reset_control_get_shared_by_index(struct device *dev, int index) { - return __devm_reset_control_get(dev, NULL, index, 1); + return __devm_reset_control_get(dev, NULL, index, true); } /* -- cgit v1.2.3 From bb475230b8e59a547ab66ac3b02572df21a580e9 Mon Sep 17 00:00:00 2001 From: Ramiro Oliveira Date: Fri, 13 Jan 2017 17:57:41 +0000 Subject: reset: make optional functions really optional The *_get_optional_* functions weren't really optional so this patch makes them really optional. These *_get_optional_* functions will now return NULL instead of an error if no matching reset phandle is found in the DT, and all the reset_control_* functions now accept NULL rstc pointers. Signed-off-by: Ramiro Oliveira Signed-off-by: Philipp Zabel --- include/linux/reset.h | 45 ++++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/reset.h b/include/linux/reset.h index ec1d1fd28f5f..86b4ed75359e 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -13,10 +13,12 @@ int reset_control_deassert(struct reset_control *rstc); int reset_control_status(struct reset_control *rstc); struct reset_control *__of_reset_control_get(struct device_node *node, - const char *id, int index, bool shared); + const char *id, int index, bool shared, + bool optional); void reset_control_put(struct reset_control *rstc); struct reset_control *__devm_reset_control_get(struct device *dev, - const char *id, int index, bool shared); + const char *id, int index, bool shared, + bool optional); int __must_check device_reset(struct device *dev); @@ -69,14 +71,15 @@ static inline int device_reset_optional(struct device *dev) static inline struct reset_control *__of_reset_control_get( struct device_node *node, - const char *id, int index, bool shared) + const char *id, int index, bool shared, + bool optional) { return ERR_PTR(-ENOTSUPP); } static inline struct reset_control *__devm_reset_control_get( - struct device *dev, - const char *id, int index, bool shared) + struct device *dev, const char *id, + int index, bool shared, bool optional) { return ERR_PTR(-ENOTSUPP); } @@ -104,7 +107,8 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id) #ifndef CONFIG_RESET_CONTROLLER WARN_ON(1); #endif - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false, + false); } /** @@ -132,19 +136,22 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id) static inline struct reset_control *reset_control_get_shared( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true); + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true, + false); } static inline struct reset_control *reset_control_get_optional_exclusive( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false); + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false, + true); } static inline struct reset_control *reset_control_get_optional_shared( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true); + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true, + true); } /** @@ -160,7 +167,7 @@ static inline struct reset_control *reset_control_get_optional_shared( static inline struct reset_control *of_reset_control_get_exclusive( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0, 0); + return __of_reset_control_get(node, id, 0, false, false); } /** @@ -185,7 +192,7 @@ static inline struct reset_control *of_reset_control_get_exclusive( static inline struct reset_control *of_reset_control_get_shared( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0, true); + return __of_reset_control_get(node, id, 0, true, false); } /** @@ -202,7 +209,7 @@ static inline struct reset_control *of_reset_control_get_shared( static inline struct reset_control *of_reset_control_get_exclusive_by_index( struct device_node *node, int index) { - return __of_reset_control_get(node, NULL, index, false); + return __of_reset_control_get(node, NULL, index, false, false); } /** @@ -230,7 +237,7 @@ static inline struct reset_control *of_reset_control_get_exclusive_by_index( static inline struct reset_control *of_reset_control_get_shared_by_index( struct device_node *node, int index) { - return __of_reset_control_get(node, NULL, index, true); + return __of_reset_control_get(node, NULL, index, true, false); } /** @@ -252,7 +259,7 @@ __must_check devm_reset_control_get_exclusive(struct device *dev, #ifndef CONFIG_RESET_CONTROLLER WARN_ON(1); #endif - return __devm_reset_control_get(dev, id, 0, false); + return __devm_reset_control_get(dev, id, 0, false, false); } /** @@ -267,19 +274,19 @@ __must_check devm_reset_control_get_exclusive(struct device *dev, static inline struct reset_control *devm_reset_control_get_shared( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, true); + return __devm_reset_control_get(dev, id, 0, true, false); } static inline struct reset_control *devm_reset_control_get_optional_exclusive( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, false); + return __devm_reset_control_get(dev, id, 0, false, true); } static inline struct reset_control *devm_reset_control_get_optional_shared( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, true); + return __devm_reset_control_get(dev, id, 0, true, true); } /** @@ -297,7 +304,7 @@ static inline struct reset_control *devm_reset_control_get_optional_shared( static inline struct reset_control * devm_reset_control_get_exclusive_by_index(struct device *dev, int index) { - return __devm_reset_control_get(dev, NULL, index, false); + return __devm_reset_control_get(dev, NULL, index, false, false); } /** @@ -313,7 +320,7 @@ devm_reset_control_get_exclusive_by_index(struct device *dev, int index) static inline struct reset_control * devm_reset_control_get_shared_by_index(struct device *dev, int index) { - return __devm_reset_control_get(dev, NULL, index, true); + return __devm_reset_control_get(dev, NULL, index, true, false); } /* -- cgit v1.2.3 From a62a77881b1b6708ffeddd9bf0529494f7b199e3 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 16 Jan 2017 11:17:57 +0100 Subject: brcmfmac: add support for BCM43455 with modalias sdio:c00v02D0dA9BF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BCM43455 is a more recent revision of the BCM4345. Some of the BCM43455 got a dedicated SDIO device ID which is currently not supported by brcmfmac. Adding the new sdio_device_id to brcmfmac is enough to get the BCM43455 supported because the chip itself is already supported (due to BCM4345 support in the driver). Signed-off-by: Martin Blumenstingl Acked-by: Arend van Spriel Reviewed-by: Andreas Färber Tested-by: Andreas Färber Signed-off-by: Kalle Valo --- include/linux/mmc/sdio_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index d43ef96bf075..71b113e1223f 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -36,6 +36,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 #define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6 #define SDIO_DEVICE_ID_BROADCOM_4345 0x4345 +#define SDIO_DEVICE_ID_BROADCOM_43455 0xa9bf #define SDIO_DEVICE_ID_BROADCOM_4354 0x4354 #define SDIO_DEVICE_ID_BROADCOM_4356 0x4356 -- cgit v1.2.3 From 6391a4481ba0796805d6581e42f9f0418c099e34 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 20 Jan 2017 14:32:42 +0800 Subject: virtio-net: restore VIRTIO_HDR_F_DATA_VALID on receiving Commit 501db511397f ("virtio: don't set VIRTIO_NET_HDR_F_DATA_VALID on xmit") in fact disables VIRTIO_HDR_F_DATA_VALID on receiving path too, fixing this by adding a hint (has_data_valid) and set it only on the receiving path. Cc: Rolf Neugebauer Signed-off-by: Jason Wang Acked-by: Rolf Neugebauer Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 56436472ccc7..5209b5ed2a64 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -56,7 +56,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, - bool little_endian) + bool little_endian, + bool has_data_valid) { memset(hdr, 0, sizeof(*hdr)); /* no info leak */ @@ -91,6 +92,9 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, skb_checksum_start_offset(skb)); hdr->csum_offset = __cpu_to_virtio16(little_endian, skb->csum_offset); + } else if (has_data_valid && + skb->ip_summed == CHECKSUM_UNNECESSARY) { + hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ return 0; -- cgit v1.2.3 From 76640b84bd7a9d68c70d8bc8ecd02cdc4bd8855e Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 19 Jan 2017 14:48:41 +0100 Subject: soc: samsung: pmu: Provide global function to get PMU regmap PMU is something like a SoC wide service, so add a helper function to get PMU regmap. This will be used by other Exynos device drivers. This way it can be avoided to model this dependency in device tree (as phandles to PMU node) for almost every device in the SoC. Signed-off-by: Marek Szyprowski Reviewed-by: Tomasz Figa Signed-off-by: Krzysztof Kozlowski --- include/linux/soc/samsung/exynos-pmu.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/samsung/exynos-pmu.h b/include/linux/soc/samsung/exynos-pmu.h index e2e9de1acc5b..e57eb4b6cc5a 100644 --- a/include/linux/soc/samsung/exynos-pmu.h +++ b/include/linux/soc/samsung/exynos-pmu.h @@ -12,6 +12,8 @@ #ifndef __LINUX_SOC_EXYNOS_PMU_H #define __LINUX_SOC_EXYNOS_PMU_H +struct regmap; + enum sys_powerdown { SYS_AFTR, SYS_LPA, @@ -20,5 +22,13 @@ enum sys_powerdown { }; extern void exynos_sys_powerdown_conf(enum sys_powerdown mode); +#ifdef CONFIG_EXYNOS_PMU +extern struct regmap *exynos_get_pmu_regmap(void); +#else +static inline struct regmap *exynos_get_pmu_regmap(void) +{ + return ERR_PTR(-ENODEV); +} +#endif #endif /* __LINUX_SOC_EXYNOS_PMU_H */ -- cgit v1.2.3 From 582a686f52d2e002da64093fe4b9aa5befcaf4e4 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 18 Jan 2017 14:04:37 +0100 Subject: device: bus_type: Introduce num_vf callback This allows for bus types to implement their own method of retrieving the number of virtual functions a NIC on that type of bus supports. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- include/linux/device.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 491b4c0ca633..6d73b70a4a5d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -88,6 +88,8 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *); * * @suspend: Called when a device on this bus wants to go to sleep mode. * @resume: Called to bring a device on this bus out of sleep mode. + * @num_vf: Called to find out how many virtual functions a device on this + * bus supports. * @pm: Power management operations of this bus, callback the specific * device driver's pm-ops. * @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU @@ -127,6 +129,8 @@ struct bus_type { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); + int (*num_vf)(struct device *dev); + const struct dev_pm_ops *pm; const struct iommu_ops *iommu_ops; -- cgit v1.2.3 From 9af15c38254d81c9991eba89335ca7c537d7f2c3 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 18 Jan 2017 14:04:39 +0100 Subject: device: Implement a bus agnostic dev_num_vf routine Now that pci_bus_type has num_vf callback set, dev_num_vf can be implemented in a bus type independent way and the check for whether a PCI device is being handled in rtnetlink can be dropped. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- include/linux/device.h | 7 +++++++ include/linux/pci.h | 2 -- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 6d73b70a4a5d..bd684fc8ec1d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1144,6 +1144,13 @@ extern int device_online(struct device *dev); extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); extern void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode); +static inline int dev_num_vf(struct device *dev) +{ + if (dev->bus && dev->bus->num_vf) + return dev->bus->num_vf(dev); + return 0; +} + /* * Root device objects for grouping under /sys/devices */ diff --git a/include/linux/pci.h b/include/linux/pci.h index e2d1a124216a..adbc859fe7c4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -885,7 +885,6 @@ void pcibios_setup_bridge(struct pci_bus *bus, unsigned long type); void pci_sort_breadthfirst(void); #define dev_is_pci(d) ((d)->bus == &pci_bus_type) #define dev_is_pf(d) ((dev_is_pci(d) ? to_pci_dev(d)->is_physfn : false)) -#define dev_num_vf(d) ((dev_is_pci(d) ? pci_num_vf(to_pci_dev(d)) : 0)) /* Generic PCI functions exported to card drivers */ @@ -1630,7 +1629,6 @@ static inline int pci_get_new_domain_nr(void) { return -ENOSYS; } #define dev_is_pci(d) (false) #define dev_is_pf(d) (false) -#define dev_num_vf(d) (0) #endif /* CONFIG_PCI */ /* Include architecture-dependent settings and functions */ -- cgit v1.2.3 From 9f8197980d87a28ec3d0b3b986f770e7e7878485 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Jan 2017 10:59:13 +0000 Subject: delay: Add explanation of udelay() inaccuracy There seems to be some misunderstanding that udelay() and friends will always guarantee the specified delay. This is a false understanding. When udelay() is based on CPU cycles, it can return early for many reasons which are detailed by Linus' reply to me in a thread in 2011: http://lists.openwall.net/linux-kernel/2011/01/12/372 However, a udelay test module was created in 2014 which allows udelay() to only be 0.5% fast, which is outside of the CPU-cycles udelay() results I measured back in 2011, which were deemed to be in the "we don't care" region. test_udelay() should be fixed to reflect the real allowable tolerance on udelay(), rather than 0.5%. Cc: David Riley Cc: John Stultz Signed-off-by: Russell King Signed-off-by: John Stultz --- include/linux/delay.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/delay.h b/include/linux/delay.h index a6ecb34cf547..2ecb3c46b20a 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -5,6 +5,17 @@ * Copyright (C) 1993 Linus Torvalds * * Delay routines, using a pre-computed "loops_per_jiffy" value. + * + * Please note that ndelay(), udelay() and mdelay() may return early for + * several reasons: + * 1. computed loops_per_jiffy too low (due to the time taken to + * execute the timer interrupt.) + * 2. cache behaviour affecting the time it takes to execute the + * loop function. + * 3. CPU clock rate changes. + * + * Please see this thread: + * http://lists.openwall.net/linux-kernel/2011/01/09/56 */ #include -- cgit v1.2.3 From bcc9a76d5ac426bc45c9e863b1830347827ca77a Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sat, 21 Jan 2017 21:33:35 -0500 Subject: locking/rwsem: Reinit wake_q after use In __rwsem_down_write_failed_common(), the same wake_q variable name is defined twice, with the inner wake_q hiding the one in outer scope. We can either use different names for the two wake_q's. Even better, we can use the same wake_q twice, if necessary. To enable the latter change, we need to define a new helper function wake_q_init() to enable reinitalization of wake_q after use. Signed-off-by: Waiman Long Cc: Andrew Morton Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1485052415-9611-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f4f9d32f12b3..4e62b378bd65 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1021,6 +1021,12 @@ struct wake_q_head { #define DEFINE_WAKE_Q(name) \ struct wake_q_head name = { WAKE_Q_TAIL, &name.first } +static inline void wake_q_init(struct wake_q_head *head) +{ + head->first = WAKE_Q_TAIL; + head->lastp = &head->first; +} + extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); extern void wake_up_q(struct wake_q_head *head); -- cgit v1.2.3 From 582d0ac397ca02a6a3fb653b13154ac87b884beb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 20 Jan 2017 12:36:33 -0800 Subject: net: phy: bcm7xxx: Add entry for BCM7278 Add support for the BCM7278 28nm process Gigabit Ethernet PHY. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 4f7d8be9ddbf..295fb3e73de5 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -24,6 +24,7 @@ #define PHY_ID_BCM57780 0x03625d90 #define PHY_ID_BCM7250 0xae025280 +#define PHY_ID_BCM7278 0xae0251a0 #define PHY_ID_BCM7364 0xae025260 #define PHY_ID_BCM7366 0x600d8490 #define PHY_ID_BCM7346 0x600d8650 -- cgit v1.2.3 From fdbe574eb69312a7fbe09674d69c01b80e4ed9dc Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 19 Jan 2017 20:57:46 +0000 Subject: iommu/dma: Allow MSI-only cookies IOMMU domain users such as VFIO face a similar problem to DMA API ops with regard to mapping MSI messages in systems where the MSI write is subject to IOMMU translation. With the relevant infrastructure now in place for managed DMA domains, it's actually really simple for other users to piggyback off that and reap the benefits without giving up their own IOVA management, and without having to reinvent their own wheel in the MSI layer. Allow such users to opt into automatic MSI remapping by dedicating a region of their IOVA space to a managed cookie, and extend the mapping routine to implement a trivial linear allocator in such cases, to avoid the needless overhead of a full-blown IOVA domain. Signed-off-by: Robin Murphy Reviewed-by: Tomasz Nowicki Reviewed-by: Eric Auger Tested-by: Eric Auger Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- include/linux/dma-iommu.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 7f7e9a7e3839..28df844a23b6 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -27,6 +27,7 @@ int iommu_dma_init(void); /* Domain management interface for IOMMU drivers */ int iommu_get_dma_cookie(struct iommu_domain *domain); +int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); void iommu_put_dma_cookie(struct iommu_domain *domain); /* Setup call for arch DMA mapping code */ @@ -86,6 +87,11 @@ static inline int iommu_get_dma_cookie(struct iommu_domain *domain) return -ENODEV; } +static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) +{ + return -ENODEV; +} + static inline void iommu_put_dma_cookie(struct iommu_domain *domain) { } -- cgit v1.2.3 From e5b5234a36ca283158721d3d2e0cddfa324abdf9 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:47 +0000 Subject: iommu: Rename iommu_dm_regions into iommu_resv_regions We want to extend the callbacks used for dm regions and use them for reserved regions. Reserved regions can be - directly mapped regions - regions that cannot be iommu mapped (PCI host bridge windows, ...) - MSI regions (because they belong to another address space or because they are not translated by the IOMMU and need special handling) So let's rename the struct and also the callbacks. Signed-off-by: Eric Auger Acked-by: Robin Murphy Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- include/linux/iommu.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0ff5111f6959..bfecb8b74078 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -118,13 +118,13 @@ enum iommu_attr { }; /** - * struct iommu_dm_region - descriptor for a direct mapped memory region + * struct iommu_resv_region - descriptor for a reserved memory region * @list: Linked list pointers * @start: System physical start address of the region * @length: Length of the region in bytes * @prot: IOMMU Protection flags (READ/WRITE/...) */ -struct iommu_dm_region { +struct iommu_resv_region { struct list_head list; phys_addr_t start; size_t length; @@ -150,9 +150,9 @@ struct iommu_dm_region { * @device_group: find iommu group for a particular device * @domain_get_attr: Query domain attributes * @domain_set_attr: Change domain attributes - * @get_dm_regions: Request list of direct mapping requirements for a device - * @put_dm_regions: Free list of direct mapping requirements for a device - * @apply_dm_region: Temporary helper call-back for iova reserved ranges + * @get_resv_regions: Request list of reserved regions for a device + * @put_resv_regions: Free list of reserved regions for a device + * @apply_resv_region: Temporary helper call-back for iova reserved ranges * @domain_window_enable: Configure and enable a particular window for a domain * @domain_window_disable: Disable a particular window for a domain * @domain_set_windows: Set the number of windows for a domain @@ -184,11 +184,12 @@ struct iommu_ops { int (*domain_set_attr)(struct iommu_domain *domain, enum iommu_attr attr, void *data); - /* Request/Free a list of direct mapping requirements for a device */ - void (*get_dm_regions)(struct device *dev, struct list_head *list); - void (*put_dm_regions)(struct device *dev, struct list_head *list); - void (*apply_dm_region)(struct device *dev, struct iommu_domain *domain, - struct iommu_dm_region *region); + /* Request/Free a list of reserved regions for a device */ + void (*get_resv_regions)(struct device *dev, struct list_head *list); + void (*put_resv_regions)(struct device *dev, struct list_head *list); + void (*apply_resv_region)(struct device *dev, + struct iommu_domain *domain, + struct iommu_resv_region *region); /* Window handling functions */ int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr, @@ -233,8 +234,8 @@ extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t io extern void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token); -extern void iommu_get_dm_regions(struct device *dev, struct list_head *list); -extern void iommu_put_dm_regions(struct device *dev, struct list_head *list); +extern void iommu_get_resv_regions(struct device *dev, struct list_head *list); +extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); extern int iommu_request_dm_for_dev(struct device *dev); extern int iommu_attach_group(struct iommu_domain *domain, @@ -443,12 +444,12 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain, { } -static inline void iommu_get_dm_regions(struct device *dev, +static inline void iommu_get_resv_regions(struct device *dev, struct list_head *list) { } -static inline void iommu_put_dm_regions(struct device *dev, +static inline void iommu_put_resv_regions(struct device *dev, struct list_head *list) { } -- cgit v1.2.3 From d30ddcaa7b028049cdfee3a40248002d07b2bbf3 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:48 +0000 Subject: iommu: Add a new type field in iommu_resv_region We introduce a new field to differentiate the reserved region types and specialize the apply_resv_region implementation. Legacy direct mapped regions have IOMMU_RESV_DIRECT type. We introduce 2 new reserved memory types: - IOMMU_RESV_MSI will characterize MSI regions that are mapped - IOMMU_RESV_RESERVED characterize regions that cannot by mapped. Signed-off-by: Eric Auger Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- include/linux/iommu.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bfecb8b74078..233a6bf093bf 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -117,18 +117,25 @@ enum iommu_attr { DOMAIN_ATTR_MAX, }; +/* These are the possible reserved region types */ +#define IOMMU_RESV_DIRECT (1 << 0) +#define IOMMU_RESV_RESERVED (1 << 1) +#define IOMMU_RESV_MSI (1 << 2) + /** * struct iommu_resv_region - descriptor for a reserved memory region * @list: Linked list pointers * @start: System physical start address of the region * @length: Length of the region in bytes * @prot: IOMMU Protection flags (READ/WRITE/...) + * @type: Type of the reserved region */ struct iommu_resv_region { struct list_head list; phys_addr_t start; size_t length; int prot; + int type; }; #ifdef CONFIG_IOMMU_API -- cgit v1.2.3 From 2b20cbba3390a55c511acba2f0f517dd27a528b2 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:49 +0000 Subject: iommu: iommu_alloc_resv_region Introduce a new helper serving the purpose to allocate a reserved region. This will be used in iommu driver implementing reserved region callbacks. Signed-off-by: Eric Auger Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- include/linux/iommu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 233a6bf093bf..f6bb55d3e606 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -244,6 +244,8 @@ extern void iommu_set_fault_handler(struct iommu_domain *domain, extern void iommu_get_resv_regions(struct device *dev, struct list_head *list); extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); extern int iommu_request_dm_for_dev(struct device *dev); +extern struct iommu_resv_region * +iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, int type); extern int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group); -- cgit v1.2.3 From 6c65fb318e8bbf21e939e651028b955324f1d873 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:51 +0000 Subject: iommu: iommu_get_group_resv_regions Introduce iommu_get_group_resv_regions whose role consists in enumerating all devices from the group and collecting their reserved regions. The list is sorted and overlaps between regions of the same type are handled by merging the regions. Signed-off-by: Eric Auger Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- include/linux/iommu.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f6bb55d3e606..bec3730dc009 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -246,6 +246,8 @@ extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); extern int iommu_request_dm_for_dev(struct device *dev); extern struct iommu_resv_region * iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, int type); +extern int iommu_get_group_resv_regions(struct iommu_group *group, + struct list_head *head); extern int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group); @@ -463,6 +465,12 @@ static inline void iommu_put_resv_regions(struct device *dev, { } +static inline int iommu_get_group_resv_regions(struct iommu_group *group, + struct list_head *head) +{ + return -ENODEV; +} + static inline int iommu_request_dm_for_dev(struct device *dev) { return -ENODEV; -- cgit v1.2.3 From d0f949e220fdf5ed1033e9f6e80749b05f2e7b70 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Fri, 20 Jan 2017 10:15:03 +0100 Subject: mfd: Add STM32 Timers driver This hardware block could at used at same time for PWM generation and IIO timers. PWM and IIO timer configuration are mixed in the same registers so we need a multi fonction driver to be able to share those registers. version 7: - rebase on v4.10-rc2 version 6: - rename files to stm32-timers - rename functions to stm32_timers_xxx version 5: - fix Lee comments about detect function - add missing dependency on REGMAP_MMIO version 4: - add a function to detect Auto Reload Register (ARR) size - rename the structure shared with other drivers version 2: - rename driver "stm32-gptimer" to be align with SoC documentation - only keep one compatible - use of_platform_populate() instead of devm_mfd_add_devices() Signed-off-by: Benjamin Gaignard Acked-by: Rob Herring Signed-off-by: Lee Jones --- include/linux/mfd/stm32-timers.h | 71 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 include/linux/mfd/stm32-timers.h (limited to 'include/linux') diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h new file mode 100644 index 000000000000..d0300045f04a --- /dev/null +++ b/include/linux/mfd/stm32-timers.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) STMicroelectronics 2016 + * + * Author: Benjamin Gaignard + * + * License terms: GNU General Public License (GPL), version 2 + */ + +#ifndef _LINUX_STM32_GPTIMER_H_ +#define _LINUX_STM32_GPTIMER_H_ + +#include +#include + +#define TIM_CR1 0x00 /* Control Register 1 */ +#define TIM_CR2 0x04 /* Control Register 2 */ +#define TIM_SMCR 0x08 /* Slave mode control reg */ +#define TIM_DIER 0x0C /* DMA/interrupt register */ +#define TIM_SR 0x10 /* Status register */ +#define TIM_EGR 0x14 /* Event Generation Reg */ +#define TIM_CCMR1 0x18 /* Capt/Comp 1 Mode Reg */ +#define TIM_CCMR2 0x1C /* Capt/Comp 2 Mode Reg */ +#define TIM_CCER 0x20 /* Capt/Comp Enable Reg */ +#define TIM_PSC 0x28 /* Prescaler */ +#define TIM_ARR 0x2c /* Auto-Reload Register */ +#define TIM_CCR1 0x34 /* Capt/Comp Register 1 */ +#define TIM_CCR2 0x38 /* Capt/Comp Register 2 */ +#define TIM_CCR3 0x3C /* Capt/Comp Register 3 */ +#define TIM_CCR4 0x40 /* Capt/Comp Register 4 */ +#define TIM_BDTR 0x44 /* Break and Dead-Time Reg */ + +#define TIM_CR1_CEN BIT(0) /* Counter Enable */ +#define TIM_CR1_ARPE BIT(7) /* Auto-reload Preload Ena */ +#define TIM_CR2_MMS (BIT(4) | BIT(5) | BIT(6)) /* Master mode selection */ +#define TIM_SMCR_SMS (BIT(0) | BIT(1) | BIT(2)) /* Slave mode selection */ +#define TIM_SMCR_TS (BIT(4) | BIT(5) | BIT(6)) /* Trigger selection */ +#define TIM_DIER_UIE BIT(0) /* Update interrupt */ +#define TIM_SR_UIF BIT(0) /* Update interrupt flag */ +#define TIM_EGR_UG BIT(0) /* Update Generation */ +#define TIM_CCMR_PE BIT(3) /* Channel Preload Enable */ +#define TIM_CCMR_M1 (BIT(6) | BIT(5)) /* Channel PWM Mode 1 */ +#define TIM_CCER_CC1E BIT(0) /* Capt/Comp 1 out Ena */ +#define TIM_CCER_CC1P BIT(1) /* Capt/Comp 1 Polarity */ +#define TIM_CCER_CC1NE BIT(2) /* Capt/Comp 1N out Ena */ +#define TIM_CCER_CC1NP BIT(3) /* Capt/Comp 1N Polarity */ +#define TIM_CCER_CC2E BIT(4) /* Capt/Comp 2 out Ena */ +#define TIM_CCER_CC3E BIT(8) /* Capt/Comp 3 out Ena */ +#define TIM_CCER_CC4E BIT(12) /* Capt/Comp 4 out Ena */ +#define TIM_CCER_CCXE (BIT(0) | BIT(4) | BIT(8) | BIT(12)) +#define TIM_BDTR_BKE BIT(12) /* Break input enable */ +#define TIM_BDTR_BKP BIT(13) /* Break input polarity */ +#define TIM_BDTR_AOE BIT(14) /* Automatic Output Enable */ +#define TIM_BDTR_MOE BIT(15) /* Main Output Enable */ +#define TIM_BDTR_BKF (BIT(16) | BIT(17) | BIT(18) | BIT(19)) +#define TIM_BDTR_BK2F (BIT(20) | BIT(21) | BIT(22) | BIT(23)) +#define TIM_BDTR_BK2E BIT(24) /* Break 2 input enable */ +#define TIM_BDTR_BK2P BIT(25) /* Break 2 input polarity */ + +#define MAX_TIM_PSC 0xFFFF +#define TIM_CR2_MMS_SHIFT 4 +#define TIM_SMCR_TS_SHIFT 4 +#define TIM_BDTR_BKF_MASK 0xF +#define TIM_BDTR_BKF_SHIFT 16 +#define TIM_BDTR_BK2F_SHIFT 20 + +struct stm32_timers { + struct clk *clk; + struct regmap *regmap; + u32 max_arr; +}; +#endif -- cgit v1.2.3 From 631a9639ac413da6242cb15558ebd661cf633622 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:57 +0000 Subject: irqdomain: Add irq domain MSI and MSI_REMAP flags We introduce two new enum values for the irq domain flag: - IRQ_DOMAIN_FLAG_MSI indicates the irq domain corresponds to an MSI domain - IRQ_DOMAIN_FLAG_MSI_REMAP indicates the irq domain has MSI remapping capabilities. Those values will be useful to check all MSI irq domains have MSI remapping support when assessing the safety of IRQ assignment to a guest. irq_domain_hierarchical_is_msi_remap() allows to check if an irq domain or any parent implements MSI remapping. Signed-off-by: Eric Auger Reviewed-by: Marc Zyngier Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- include/linux/irqdomain.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index ffb84604c1de..bc2f5719dace 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -183,6 +183,12 @@ enum { /* Irq domain is an IPI domain with single virq */ IRQ_DOMAIN_FLAG_IPI_SINGLE = (1 << 3), + /* Irq domain implements MSIs */ + IRQ_DOMAIN_FLAG_MSI = (1 << 4), + + /* Irq domain implements MSI remapping */ + IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the @@ -446,6 +452,19 @@ static inline bool irq_domain_is_ipi_single(struct irq_domain *domain) { return domain->flags & IRQ_DOMAIN_FLAG_IPI_SINGLE; } + +static inline bool irq_domain_is_msi(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_MSI; +} + +static inline bool irq_domain_is_msi_remap(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_MSI_REMAP; +} + +extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain); + #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ static inline void irq_domain_activate_irq(struct irq_data *data) { } static inline void irq_domain_deactivate_irq(struct irq_data *data) { } @@ -477,6 +496,22 @@ static inline bool irq_domain_is_ipi_single(struct irq_domain *domain) { return false; } + +static inline bool irq_domain_is_msi(struct irq_domain *domain) +{ + return false; +} + +static inline bool irq_domain_is_msi_remap(struct irq_domain *domain) +{ + return false; +} + +static inline bool +irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain) +{ + return false; +} #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #else /* CONFIG_IRQ_DOMAIN */ -- cgit v1.2.3 From c7b41f0af38f53e46050b56a5b0e96710097b83c Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 19 Jan 2017 20:57:59 +0000 Subject: irqdomain: irq_domain_check_msi_remap This new function checks whether all MSI irq domains implement IRQ remapping. This is useful to understand whether VFIO passthrough is safe with respect to interrupts. On ARM typically an MSI controller can sit downstream to the IOMMU without preventing VFIO passthrough. As such any assigned device can write into the MSI doorbell. In case the MSI controller implements IRQ remapping, assigned devices will not be able to trigger interrupts towards the host. On the contrary, the assignment must be emphasized as unsafe with respect to interrupts. Signed-off-by: Eric Auger Reviewed-by: Marc Zyngier Reviewed-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Bharat Bhushan Signed-off-by: Will Deacon --- include/linux/irqdomain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index bc2f5719dace..188eced6813e 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -222,6 +222,7 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, void *host_data); extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); +extern bool irq_domain_check_msi_remap(void); extern void irq_set_default_host(struct irq_domain *host); extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, irq_hw_number_t hwirq, int node, -- cgit v1.2.3 From d78973c32a210b0057b51880f7119bf2b61d5a65 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 12 Dec 2016 18:01:45 -0800 Subject: llist: Clarify comments about when locking is needed llist.h comments are confusing about when locking is needed versus when it isn't. Clarify these comments by being more descriptive about why locking is needed for llist_del_first. Cc: Ingo Molnar Cc: Will Deacon Cc: Paul McKenney Acked-by: Huang Ying Acked-by: Mathieu Desnoyers Signed-off-by: Joel Fernandes Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/llist.h | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/llist.h b/include/linux/llist.h index fd4ca0b4fe0f..171baa90f6f6 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -3,28 +3,33 @@ /* * Lock-less NULL terminated single linked list * - * If there are multiple producers and multiple consumers, llist_add - * can be used in producers and llist_del_all can be used in - * consumers. They can work simultaneously without lock. But - * llist_del_first can not be used here. Because llist_del_first - * depends on list->first->next does not changed if list->first is not - * changed during its operation, but llist_del_first, llist_add, - * llist_add (or llist_del_all, llist_add, llist_add) sequence in - * another consumer may violate that. - * - * If there are multiple producers and one consumer, llist_add can be - * used in producers and llist_del_all or llist_del_first can be used - * in the consumer. - * - * This can be summarized as follow: + * Cases where locking is not needed: + * If there are multiple producers and multiple consumers, llist_add can be + * used in producers and llist_del_all can be used in consumers simultaneously + * without locking. Also a single consumer can use llist_del_first while + * multiple producers simultaneously use llist_add, without any locking. + * + * Cases where locking is needed: + * If we have multiple consumers with llist_del_first used in one consumer, and + * llist_del_first or llist_del_all used in other consumers, then a lock is + * needed. This is because llist_del_first depends on list->first->next not + * changing, but without lock protection, there's no way to be sure about that + * if a preemption happens in the middle of the delete operation and on being + * preempted back, the list->first is the same as before causing the cmpxchg in + * llist_del_first to succeed. For example, while a llist_del_first operation + * is in progress in one consumer, then a llist_del_first, llist_add, + * llist_add (or llist_del_all, llist_add, llist_add) sequence in another + * consumer may cause violations. + * + * This can be summarized as follows: * * | add | del_first | del_all * add | - | - | - * del_first | | L | L * del_all | | | - * - * Where "-" stands for no lock is needed, while "L" stands for lock - * is needed. + * Where, a particular row's operation can happen concurrently with a column's + * operation, with "-" being no lock needed, while "L" being lock is needed. * * The list entries deleted via llist_del_all can be traversed with * traversing function such as llist_for_each etc. But the list -- cgit v1.2.3 From 02a5c550b2738f2bfea8e1e00aa75944d71c9e18 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Nov 2016 17:25:06 -0700 Subject: rcu: Abstract extended quiescent state determination This commit is the fourth step towards full abstraction of all accesses to the ->dynticks counter, implementing previously open-coded checks and comparisons in new rcu_dynticks_in_eqs() and rcu_dynticks_in_eqs_since() functions. This abstraction will ease changes to the ->dynticks counter operation. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcutiny.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index ac81e4063b40..4f9b2fa2173d 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -27,6 +27,12 @@ #include +struct rcu_dynticks; +static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp) +{ + return 0; +} + static inline unsigned long get_state_synchronize_rcu(void) { return 0; -- cgit v1.2.3 From 1cce1eea0aff51201753fcaca421df825b0813b6 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 14 Dec 2016 15:56:33 +0200 Subject: inotify: Convert to using per-namespace limits This patchset converts inotify to using the newly introduced per-userns sysctl infrastructure. Currently the inotify instances/watches are being accounted in the user_struct structure. This means that in setups where multiple users in unprivileged containers map to the same underlying real user (i.e. pointing to the same user_struct) the inotify limits are going to be shared as well, allowing one user(or application) to exhaust all others limits. Fix this by switching the inotify sysctls to using the per-namespace/per-user limits. This will allow the server admin to set sensible global limits, which can further be tuned inside every individual user namespace. Additionally, in order to preserve the sysctl ABI make the existing inotify instances/watches sysctls modify the values of the initial user namespace. Signed-off-by: Nikolay Borisov Acked-by: Jan Kara Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- include/linux/fsnotify_backend.h | 3 ++- include/linux/sched.h | 4 ---- include/linux/user_namespace.h | 4 ++++ 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 0cf34d6cc253..c8f2738113f4 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -16,6 +16,7 @@ #include #include #include +#include /* * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily @@ -170,7 +171,7 @@ struct fsnotify_group { struct inotify_group_private_data { spinlock_t idr_lock; struct idr idr; - struct user_struct *user; + struct ucounts *ucounts; } inotify_data; #endif #ifdef CONFIG_FANOTIFY diff --git a/include/linux/sched.h b/include/linux/sched.h index 4d1905245c7a..d2334229167f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -868,10 +868,6 @@ struct user_struct { atomic_t __count; /* reference count */ atomic_t processes; /* How many processes does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ -#ifdef CONFIG_INOTIFY_USER - atomic_t inotify_watches; /* How many inotify watches does this user have? */ - atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ -#endif #ifdef CONFIG_FANOTIFY atomic_t fanotify_listeners; #endif diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index eb209d4523f5..363e0e8082a9 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -32,6 +32,10 @@ enum ucount_type { UCOUNT_NET_NAMESPACES, UCOUNT_MNT_NAMESPACES, UCOUNT_CGROUP_NAMESPACES, +#ifdef CONFIG_INOTIFY_USER + UCOUNT_INOTIFY_INSTANCES, + UCOUNT_INOTIFY_WATCHES, +#endif UCOUNT_COUNTS, }; -- cgit v1.2.3 From 9227dd2a84a765fcfef1677ff17de0958b192eda Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 23 Jan 2017 17:26:31 +1300 Subject: exec: Remove LSM_UNSAFE_PTRACE_CAP With previous changes every location that tests for LSM_UNSAFE_PTRACE_CAP also tests for LSM_UNSAFE_PTRACE making the LSM_UNSAFE_PTRACE_CAP redundant, so remove it. Signed-off-by: "Eric W. Biederman" --- include/linux/security.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index c2125e9093e8..9d9ee90f1f35 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -140,8 +140,7 @@ struct request_sock; /* bprm->unsafe reasons */ #define LSM_UNSAFE_SHARE 1 #define LSM_UNSAFE_PTRACE 2 -#define LSM_UNSAFE_PTRACE_CAP 4 -#define LSM_UNSAFE_NO_NEW_PRIVS 8 +#define LSM_UNSAFE_NO_NEW_PRIVS 4 #ifdef CONFIG_MMU extern int mmap_min_addr_handler(struct ctl_table *table, int write, -- cgit v1.2.3 From 1331b62c97217459780e040e8a66bb609f2acd20 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 2 Jan 2017 16:01:57 +0100 Subject: rfkill: remove rfkill-regulator There are no users of this ("vrfkill") in the tree, so it's just dead code - remove it. This also isn't really how rfkill is supposed to be used - it's intended as a signalling mechanism to/from the device, which the driver (and partially cfg80211) will handle - having a separate rfkill instance for a regulator is confusing, the driver should use the regulator instead to turn off the device when requested. Signed-off-by: Johannes Berg --- include/linux/rfkill-regulator.h | 48 ---------------------------------------- 1 file changed, 48 deletions(-) delete mode 100644 include/linux/rfkill-regulator.h (limited to 'include/linux') diff --git a/include/linux/rfkill-regulator.h b/include/linux/rfkill-regulator.h deleted file mode 100644 index aca36bc83315..000000000000 --- a/include/linux/rfkill-regulator.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * rfkill-regulator.c - Regulator consumer driver for rfkill - * - * Copyright (C) 2009 Guiming Zhuo - * Copyright (C) 2011 Antonio Ospite - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#ifndef __LINUX_RFKILL_REGULATOR_H -#define __LINUX_RFKILL_REGULATOR_H - -/* - * Use "vrfkill" as supply id when declaring the regulator consumer: - * - * static struct regulator_consumer_supply pcap_regulator_V6_consumers [] = { - * { .dev_name = "rfkill-regulator.0", .supply = "vrfkill" }, - * }; - * - * If you have several regulator driven rfkill, you can append a numerical id to - * .dev_name as done above, and use the same id when declaring the platform - * device: - * - * static struct rfkill_regulator_platform_data ezx_rfkill_bt_data = { - * .name = "ezx-bluetooth", - * .type = RFKILL_TYPE_BLUETOOTH, - * }; - * - * static struct platform_device a910_rfkill = { - * .name = "rfkill-regulator", - * .id = 0, - * .dev = { - * .platform_data = &ezx_rfkill_bt_data, - * }, - * }; - */ - -#include - -struct rfkill_regulator_platform_data { - char *name; /* the name for the rfkill switch */ - enum rfkill_type type; /* the type as specified in rfkill.h */ -}; - -#endif /* __LINUX_RFKILL_REGULATOR_H */ -- cgit v1.2.3 From 12a6075cabc0d9ffbc0366b44daa22f278606312 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 10 Jan 2017 18:52:06 +0100 Subject: can: dev: add CAN interface termination API This patch adds a netlink interface to configure the CAN bus termination of CAN interfaces. Inside the driver an array of supported termination values is defined: const u16 drvname_termination[] = { 60, 120, CAN_TERMINATION_DISABLED }; struct drvname_priv *priv; priv = netdev_priv(dev); priv->termination_const = drvname_termination; priv->termination_const_cnt = ARRAY_SIZE(drvname_termination); priv->termination = CAN_TERMINATION_DISABLED; And the funtion to set the value has to be defined: priv->do_set_termination = drvname_set_termination; Signed-off-by: Oliver Hartkopp Reviewed-by: Ramesh Shanmugasundaram Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 5f5270941ba0..f6a57f322f00 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -38,6 +38,9 @@ struct can_priv { struct can_bittiming bittiming, data_bittiming; const struct can_bittiming_const *bittiming_const, *data_bittiming_const; + const u16 *termination_const; + unsigned int termination_const_cnt; + u16 termination; struct can_clock clock; enum can_state state; @@ -53,6 +56,7 @@ struct can_priv { int (*do_set_bittiming)(struct net_device *dev); int (*do_set_data_bittiming)(struct net_device *dev); int (*do_set_mode)(struct net_device *dev, enum can_mode mode); + int (*do_set_termination)(struct net_device *dev, u16 term); int (*do_get_state)(const struct net_device *dev, enum can_state *state); int (*do_get_berr_counter)(const struct net_device *dev, -- cgit v1.2.3 From 431af779256cd6cb8328ac23c5696bae63c33a51 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 11 Jan 2017 17:05:35 +0100 Subject: can: dev: add CAN interface API for fixed bitrates Some CAN interfaces only support fixed fixed bitrates. This patch adds a netlink interface to get the list of the CAN interface's fixed bitrates and data bitrates. Inside the driver arrays of supported data- bitrate values are defined. const u32 drvname_bitrate[] = { 20000, 50000, 100000 }; const u32 drvname_data_bitrate[] = { 200000, 500000, 1000000 }; struct drvname_priv *priv; priv = netdev_priv(dev); priv->bitrate_const = drvname_bitrate; priv->bitrate_const_cnt = ARRAY_SIZE(drvname_bitrate); priv->data_bitrate_const = drvname_data_bitrate; priv->data_bitrate_const_cnt = ARRAY_SIZE(drvname_data_bitrate); Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index f6a57f322f00..141b05aade81 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -41,6 +41,10 @@ struct can_priv { const u16 *termination_const; unsigned int termination_const_cnt; u16 termination; + const u32 *bitrate_const; + unsigned int bitrate_const_cnt; + const u32 *data_bitrate_const; + unsigned int data_bitrate_const_cnt; struct can_clock clock; enum can_state state; -- cgit v1.2.3 From 5299709d0a87342dadc1fc9850484fadeb488bf8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 20 Jan 2017 13:04:01 -0800 Subject: treewide: Constify most dma_map_ops structures Most dma_map_ops structures are never modified. Constify these structures such that these can be write-protected. This patch has been generated as follows: git grep -l 'struct dma_map_ops' | xargs -d\\n sed -i \ -e 's/struct dma_map_ops/const struct dma_map_ops/g' \ -e 's/const struct dma_map_ops {/struct dma_map_ops {/g' \ -e 's/^const struct dma_map_ops;$/struct dma_map_ops;/' \ -e 's/const const struct dma_map_ops /const struct dma_map_ops /g'; sed -i -e 's/const \(struct dma_map_ops intel_dma_ops\)/\1/' \ $(git grep -l 'struct dma_map_ops intel_dma_ops'); sed -i -e 's/const \(struct dma_map_ops dma_iommu_ops\)/\1/' \ $(git grep -l 'struct dma_map_ops' | grep ^arch/powerpc); sed -i -e '/^struct vmd_dev {$/,/^};$/ s/const \(struct dma_map_ops[[:blank:]]dma_ops;\)/\1/' \ -e '/^static void vmd_setup_dma_ops/,/^}$/ s/const \(struct dma_map_ops \*dest\)/\1/' \ -e 's/const \(struct dma_map_ops \*dest = \&vmd->dma_ops\)/\1/' \ drivers/pci/host/*.c sed -i -e '/^void __init pci_iommu_alloc(void)$/,/^}$/ s/dma_ops->/intel_dma_ops./' arch/ia64/kernel/pci-dma.c sed -i -e 's/static const struct dma_map_ops sn_dma_ops/static struct dma_map_ops sn_dma_ops/' arch/ia64/sn/pci/pci_dma.c sed -i -e 's/(const struct dma_map_ops \*)//' drivers/misc/mic/bus/vop_bus.c Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Cc: Benjamin Herrenschmidt Cc: Boris Ostrovsky Cc: David Woodhouse Cc: Juergen Gross Cc: H. Peter Anvin Cc: Ingo Molnar Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Russell King Cc: x86@kernel.org Signed-off-by: Doug Ledford --- include/linux/dma-mapping.h | 42 +++++++++++++++++++++--------------------- include/linux/mic_bus.h | 2 +- 2 files changed, 22 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 10c5a17b1f51..f1da68b82c63 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -127,7 +127,7 @@ struct dma_map_ops { int is_phys; }; -extern struct dma_map_ops dma_noop_ops; +extern const struct dma_map_ops dma_noop_ops; #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) @@ -170,8 +170,8 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, * dma dependent code. Code that depends on the dma-mapping * API needs to set 'depends on HAS_DMA' in its Kconfig */ -extern struct dma_map_ops bad_dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +extern const struct dma_map_ops bad_dma_ops; +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &bad_dma_ops; } @@ -182,7 +182,7 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); dma_addr_t addr; kmemcheck_mark_initialized(ptr, size); @@ -201,7 +201,7 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); if (ops->unmap_page) @@ -217,7 +217,7 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); int i, ents; struct scatterlist *s; @@ -235,7 +235,7 @@ static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg int nents, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); debug_dma_unmap_sg(dev, sg, nents, dir); @@ -249,7 +249,7 @@ static inline dma_addr_t dma_map_page_attrs(struct device *dev, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); dma_addr_t addr; kmemcheck_mark_initialized(page_address(page) + offset, size); @@ -265,7 +265,7 @@ static inline void dma_unmap_page_attrs(struct device *dev, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); if (ops->unmap_page) @@ -279,7 +279,7 @@ static inline dma_addr_t dma_map_resource(struct device *dev, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); dma_addr_t addr; BUG_ON(!valid_dma_direction(dir)); @@ -300,7 +300,7 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); if (ops->unmap_resource) @@ -312,7 +312,7 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); if (ops->sync_single_for_cpu) @@ -324,7 +324,7 @@ static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); if (ops->sync_single_for_device) @@ -364,7 +364,7 @@ static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); if (ops->sync_sg_for_cpu) @@ -376,7 +376,7 @@ static inline void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); if (ops->sync_sg_for_device) @@ -421,7 +421,7 @@ static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!ops); if (ops->mmap) return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); @@ -439,7 +439,7 @@ dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!ops); if (ops->get_sgtable) return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, @@ -457,7 +457,7 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); void *cpu_addr; BUG_ON(!ops); @@ -479,7 +479,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!ops); WARN_ON(irqs_disabled()); @@ -537,7 +537,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) #ifndef HAVE_ARCH_DMA_SUPPORTED static inline int dma_supported(struct device *dev, u64 mask) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); if (!ops) return 0; @@ -550,7 +550,7 @@ static inline int dma_supported(struct device *dev, u64 mask) #ifndef HAVE_ARCH_DMA_SET_MASK static inline int dma_set_mask(struct device *dev, u64 mask) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); if (ops->set_dma_mask) return ops->set_dma_mask(dev, mask); diff --git a/include/linux/mic_bus.h b/include/linux/mic_bus.h index 27d7c95fd0da..504d54c71bdb 100644 --- a/include/linux/mic_bus.h +++ b/include/linux/mic_bus.h @@ -90,7 +90,7 @@ struct mbus_hw_ops { }; struct mbus_device * -mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, +mbus_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops, struct mbus_hw_ops *hw_ops, int index, void __iomem *mmio_va); void mbus_unregister_device(struct mbus_device *mbdev); -- cgit v1.2.3 From 5657933dbb6e25feaf5d8df8c88f96cdade693a3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 20 Jan 2017 13:04:02 -0800 Subject: treewide: Move dma_ops from struct dev_archdata into struct device Some but not all architectures provide set_dma_ops(). Move dma_ops from struct dev_archdata into struct device such that it becomes possible on all architectures to configure dma_ops per device. Signed-off-by: Bart Van Assche Acked-by: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Boris Ostrovsky Cc: David Woodhouse Cc: Juergen Gross Cc: H. Peter Anvin Cc: Ingo Molnar Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Russell King Cc: x86@kernel.org Signed-off-by: Doug Ledford --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 491b4c0ca633..46a567261ccc 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -921,6 +921,7 @@ struct device { #ifdef CONFIG_NUMA int numa_node; /* NUMA node this device is close to */ #endif + const struct dma_map_ops *dma_ops; u64 *dma_mask; /* dma mask (if dma'able device) */ u64 coherent_dma_mask;/* Like dma_mask, but for alloc_coherent mappings as -- cgit v1.2.3 From ca6e8e1031419549f67291ca31b43126f07cecdb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 20 Jan 2017 13:04:03 -0800 Subject: treewide: Consolidate set_dma_ops() implementations Now that all set_dma_ops() implementations are identical (ignoring BUG_ON() statements), remove the architecture specific definitions and add a definition in . Signed-off-by: Bart Van Assche Cc: Benjamin Herrenschmidt Cc: Chris Metcalf Cc: David Woodhouse Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: Paul Mackerras Cc: Russell King Signed-off-by: Doug Ledford --- include/linux/dma-mapping.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f1da68b82c63..e97f23e8b2d9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -164,6 +164,11 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, #ifdef CONFIG_HAS_DMA #include +static inline void set_dma_ops(struct device *dev, + const struct dma_map_ops *dma_ops) +{ + dev->dma_ops = dma_ops; +} #else /* * Define the dma api to allow compilation but not linking of -- cgit v1.2.3 From 815dd18788fe0d41899f51b91d0560279cf16b0d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 20 Jan 2017 13:04:04 -0800 Subject: treewide: Consolidate get_dma_ops() implementations Introduce a new architecture-specific get_arch_dma_ops() function that takes a struct bus_type * argument. Add get_dma_ops() in . Signed-off-by: Bart Van Assche Cc: Benjamin Herrenschmidt Cc: Boris Ostrovsky Cc: David Woodhouse Cc: Juergen Gross Cc: H. Peter Anvin Cc: Ingo Molnar Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Russell King Cc: x86@kernel.org Signed-off-by: Doug Ledford --- include/linux/dma-mapping.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index e97f23e8b2d9..ab8710888ddf 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -164,6 +164,13 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, #ifdef CONFIG_HAS_DMA #include +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +{ + if (dev && dev->dma_ops) + return dev->dma_ops; + return get_arch_dma_ops(dev ? dev->bus : NULL); +} + static inline void set_dma_ops(struct device *dev, const struct dma_map_ops *dma_ops) { -- cgit v1.2.3 From 551199aca1c3102ebed390566d681cc1290284ca Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 20 Jan 2017 13:04:07 -0800 Subject: lib/dma-virt: Add dma_virt_ops Several RDMA drivers (hfi1, qib and rxe) expect that ib_sge.addr is a virtual address. Provide DMA mapping operations that are suitable for these drivers. Signed-off-by: Bart Van Assche Cc: Christian Borntraeger Cc: Joerg Roedel Cc: Andy Lutomirski Cc: Michael S. Tsirkin Signed-off-by: Doug Ledford --- include/linux/dma-mapping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index ab8710888ddf..426c43d4fdbf 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -128,6 +128,7 @@ struct dma_map_ops { }; extern const struct dma_map_ops dma_noop_ops; +extern const struct dma_map_ops dma_virt_ops; #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) -- cgit v1.2.3 From 6db6f0eae6052b70885562e1733896647ec1d807 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 21 Jan 2017 21:01:32 +0100 Subject: bridge: multicast to unicast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements an optional, per bridge port flag and feature to deliver multicast packets to any host on the according port via unicast individually. This is done by copying the packet per host and changing the multicast destination MAC to a unicast one accordingly. multicast-to-unicast works on top of the multicast snooping feature of the bridge. Which means unicast copies are only delivered to hosts which are interested in it and signalized this via IGMP/MLD reports previously. This feature is intended for interface types which have a more reliable and/or efficient way to deliver unicast packets than broadcast ones (e.g. wifi). However, it should only be enabled on interfaces where no IGMPv2/MLDv1 report suppression takes place. This feature is disabled by default. The initial patch and idea is from Felix Fietkau. Signed-off-by: Felix Fietkau [linus.luessing@c0d3.blue: various bug + style fixes, commit message] Signed-off-by: Linus Lüssing Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index c6587c01d951..debc9d5904e5 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -46,6 +46,7 @@ struct br_ip_list { #define BR_LEARNING_SYNC BIT(9) #define BR_PROXYARP_WIFI BIT(10) #define BR_MCAST_FLOOD BIT(11) +#define BR_MULTICAST_TO_UNICAST BIT(12) #define BR_DEFAULT_AGEING_TIME (300 * HZ) -- cgit v1.2.3 From 059aa734824165507c65fd30a55ff000afd14983 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 22 Jan 2017 14:04:29 -0500 Subject: nfs: Don't increment lock sequence ID after NFS4ERR_MOVED Xuan Qi reports that the Linux NFSv4 client failed to lock a file that was migrated. The steps he observed on the wire: 1. The client sent a LOCK request to the source server 2. The source server replied NFS4ERR_MOVED 3. The client switched to the destination server 4. The client sent the same LOCK request to the destination server with a bumped lock sequence ID 5. The destination server rejected the LOCK request with NFS4ERR_BAD_SEQID RFC 3530 section 8.1.5 provides a list of NFS errors which do not bump a lock sequence ID. However, RFC 3530 is now obsoleted by RFC 7530. In RFC 7530 section 9.1.7, this list has been updated by the addition of NFS4ERR_MOVED. Reported-by: Xuan Qi Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org # v3.7+ Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index bca536341d1a..1b1ca04820a3 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -282,7 +282,7 @@ enum nfsstat4 { static inline bool seqid_mutating_err(u32 err) { - /* rfc 3530 section 8.1.5: */ + /* See RFC 7530, section 9.1.7 */ switch (err) { case NFS4ERR_STALE_CLIENTID: case NFS4ERR_STALE_STATEID: @@ -291,6 +291,7 @@ static inline bool seqid_mutating_err(u32 err) case NFS4ERR_BADXDR: case NFS4ERR_RESOURCE: case NFS4ERR_NOFILEHANDLE: + case NFS4ERR_MOVED: return false; }; return true; -- cgit v1.2.3 From b70f43a16182c7dbc0779bc5bd9f621711f13eb3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 22 Jan 2017 21:17:32 -0800 Subject: net: phy: Fix typo for MDIO module boilerplate comment The module boilerplate macro is named mdio_module_driver and not module_mdio_driver, fix that. Fixes: a9049e0c513c ("mdio: Add support for mdio drivers.") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/mdio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index b6587a4b32e7..55a80d73cfc1 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -265,7 +265,7 @@ bool mdiobus_is_registered_device(struct mii_bus *bus, int addr); struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr); /** - * module_mdio_driver() - Helper macro for registering mdio drivers + * mdio_module_driver() - Helper macro for registering mdio drivers * * Helper macro for MDIO drivers which do not do anything special in module * init/exit. Each module may only use this macro once, and calling it -- cgit v1.2.3 From c9497c98901c689bf6c357f812bf864ed8f50ace Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 15 Dec 2016 14:02:53 +0200 Subject: net/mlx5: Add support for setting VF min rate Add support for SRIOV VF min rate guarantee by using the TSAR BW share weights mechanism. The TSAR BW share vport attribute represents the weight of that vport among the other vports weights which means that the actual vport BW percentage is the same vport weight percentage among the total vports weights sum. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d96ebc319d63..a919dfb920ae 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -547,7 +547,9 @@ struct mlx5_ifc_e_switch_cap_bits { struct mlx5_ifc_qos_cap_bits { u8 packet_pacing[0x1]; u8 esw_scheduling[0x1]; - u8 reserved_at_2[0x1e]; + u8 esw_bw_share[0x1]; + u8 esw_rate_limit[0x1]; + u8 reserved_at_4[0x1c]; u8 reserved_at_20[0x20]; -- cgit v1.2.3 From 8c7245a60ef8f8c4a427349690c5a141cfed6217 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 30 Nov 2016 20:23:51 +0200 Subject: net/mlx5: Push min-inline mode resolution helper into the core So we can use that from the IB driver too in downstream patches. This patch doesn't change any functionality. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/vport.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index ec35157ea725..656c70b65dd2 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -51,6 +51,7 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, u16 vport, u8 *min_inline); +void mlx5_query_min_inline(struct mlx5_core_dev *mdev, u8 *min_inline); int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, u16 vport, u8 min_inline); int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, -- cgit v1.2.3 From c929ea0b910355e1876c64431f3d5802f95b3d75 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 20 Jan 2017 16:48:39 +0800 Subject: SUNRPC: cleanup ida information when removing sunrpc module After removing sunrpc module, I get many kmemleak information as, unreferenced object 0xffff88003316b1e0 (size 544): comm "gssproxy", pid 2148, jiffies 4294794465 (age 4200.081s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4a/0xa0 [] kmem_cache_alloc+0x15e/0x1f0 [] ida_pre_get+0xaa/0x150 [] ida_simple_get+0xad/0x180 [] nlmsvc_lookup_host+0x4ab/0x7f0 [lockd] [] lockd+0x4d/0x270 [lockd] [] param_set_timeout+0x55/0x100 [lockd] [] svc_defer+0x114/0x3f0 [sunrpc] [] svc_defer+0x2d7/0x3f0 [sunrpc] [] rpc_show_info+0x8a/0x110 [sunrpc] [] proc_reg_write+0x7f/0xc0 [] __vfs_write+0xdf/0x3c0 [] vfs_write+0xef/0x240 [] SyS_write+0xad/0x130 [] entry_SYSCALL_64_fastpath+0x1a/0xa9 [] 0xffffffffffffffff I found, the ida information (dynamic memory) isn't cleanup. Signed-off-by: Kinglong Mee Fixes: 2f048db4680a ("SUNRPC: Add an identifier for struct rpc_clnt") Cc: stable@vger.kernel.org # v3.12+ Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 85cc819676e8..333ad11b3dd9 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -216,5 +216,6 @@ void rpc_clnt_xprt_switch_put(struct rpc_clnt *); void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, const struct sockaddr *sap); +void rpc_cleanup_clids(void); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3 From 8a1f780e7f28c7c1d640118242cf68d528c456cd Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Tue, 24 Jan 2017 15:17:45 -0800 Subject: memory_hotplug: make zone_can_shift() return a boolean value online_{kernel|movable} is used to change the memory zone to ZONE_{NORMAL|MOVABLE} and online the memory. To check that memory zone can be changed, zone_can_shift() is used. Currently the function returns minus integer value, plus integer value and 0. When the function returns minus or plus integer value, it means that the memory zone can be changed to ZONE_{NORNAL|MOVABLE}. But when the function returns 0, there are two meanings. One of the meanings is that the memory zone does not need to be changed. For example, when memory is in ZONE_NORMAL and onlined by online_kernel the memory zone does not need to be changed. Another meaning is that the memory zone cannot be changed. When memory is in ZONE_NORMAL and onlined by online_movable, the memory zone may not be changed to ZONE_MOVALBE due to memory online limitation(see Documentation/memory-hotplug.txt). In this case, memory must not be onlined. The patch changes the return type of zone_can_shift() so that memory online operation fails when memory zone cannot be changed as follows: Before applying patch: # grep -A 35 "Node 2" /proc/zoneinfo Node 2, zone Normal node_scanned 0 spanned 8388608 present 7864320 managed 7864320 # echo online_movable > memory4097/state # grep -A 35 "Node 2" /proc/zoneinfo Node 2, zone Normal node_scanned 0 spanned 8388608 present 8388608 managed 8388608 online_movable operation succeeded. But memory is onlined as ZONE_NORMAL, not ZONE_MOVABLE. After applying patch: # grep -A 35 "Node 2" /proc/zoneinfo Node 2, zone Normal node_scanned 0 spanned 8388608 present 7864320 managed 7864320 # echo online_movable > memory4097/state bash: echo: write error: Invalid argument # grep -A 35 "Node 2" /proc/zoneinfo Node 2, zone Normal node_scanned 0 spanned 8388608 present 7864320 managed 7864320 online_movable operation failed because of failure of changing the memory zone from ZONE_NORMAL to ZONE_MOVABLE Fixes: df429ac03936 ("memory-hotplug: more general validation of zone during online") Link: http://lkml.kernel.org/r/2f9c3837-33d7-b6e5-59c0-6ca4372b2d84@gmail.com Signed-off-by: Yasuaki Ishimatsu Reviewed-by: Reza Arbab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 01033fadea47..c1784c0b4f35 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -284,7 +284,7 @@ extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, unsigned long map_offset); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); -extern int zone_can_shift(unsigned long pfn, unsigned long nr_pages, - enum zone_type target); +extern bool zone_can_shift(unsigned long pfn, unsigned long nr_pages, + enum zone_type target, int *zone_shift); #endif /* __LINUX_MEMORY_HOTPLUG_H */ -- cgit v1.2.3 From b94f51183b0617e7b9b4fb4137d4cf1cab7547c2 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 24 Jan 2017 15:17:53 -0800 Subject: kernel/watchdog: prevent false hardlockup on overloaded system On an overloaded system, it is possible that a change in the watchdog threshold can be delayed long enough to trigger a false positive. This can easily be achieved by having a cpu spinning indefinitely on a task, while another cpu updates watchdog threshold. What happens is while trying to park the watchdog threads, the hrtimers on the other cpus trigger and reprogram themselves with the new slower watchdog threshold. Meanwhile, the nmi watchdog is still programmed with the old faster threshold. Because the one cpu is blocked, it prevents the thread parking on the other cpus from completing, which is needed to shutdown the nmi watchdog and reprogram it correctly. As a result, a false positive from the nmi watchdog is reported. Fix this by setting a park_in_progress flag to block all lockups until the parking is complete. Fix provided by Ulrich Obergfell. [akpm@linux-foundation.org: s/park_in_progress/watchdog_park_in_progress/] Link: http://lkml.kernel.org/r/1481041033-192236-1-git-send-email-dzickus@redhat.com Signed-off-by: Don Zickus Reviewed-by: Aaron Tomlin Cc: Ulrich Obergfell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index aacca824a6ae..0a3fadc32693 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -110,6 +110,7 @@ extern int watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long watchdog_enabled; extern unsigned long *watchdog_cpumask_bits; +extern atomic_t watchdog_park_in_progress; #ifdef CONFIG_SMP extern int sysctl_softlockup_all_cpu_backtrace; extern int sysctl_hardlockup_all_cpu_backtrace; -- cgit v1.2.3 From ea57485af8f4221312a5a95d63c382b45e7840dc Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 24 Jan 2017 15:18:32 -0800 Subject: mm, page_alloc: fix check for NULL preferred_zone Patch series "fix premature OOM regression in 4.7+ due to cpuset races". This is v2 of my attempt to fix the recent report based on LTP cpuset stress test [1]. The intention is to go to stable 4.9 LTSS with this, as triggering repeated OOMs is not nice. That's why the patches try to be not too intrusive. Unfortunately why investigating I found that modifying the testcase to use per-VMA policies instead of per-task policies will bring the OOM's back, but that seems to be much older and harder to fix problem. I have posted a RFC [2] but I believe that fixing the recent regressions has a higher priority. Longer-term we might try to think how to fix the cpuset mess in a better and less error prone way. I was for example very surprised to learn, that cpuset updates change not only task->mems_allowed, but also nodemask of mempolicies. Until now I expected the parameter to alloc_pages_nodemask() to be stable. I wonder why do we then treat cpusets specially in get_page_from_freelist() and distinguish HARDWALL etc, when there's unconditional intersection between mempolicy and cpuset. I would expect the nodemask adjustment for saving overhead in g_p_f(), but that clearly doesn't happen in the current form. So we have both crazy complexity and overhead, AFAICS. [1] https://lkml.kernel.org/r/CAFpQJXUq-JuEP=QPidy4p_=FN0rkH5Z-kfB4qBvsf6jMS87Edg@mail.gmail.com [2] https://lkml.kernel.org/r/7c459f26-13a6-a817-e508-b65b903a8378@suse.cz This patch (of 4): Since commit c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice") we have a wrong check for NULL preferred_zone, which can theoretically happen due to concurrent cpuset modification. We check the zoneref pointer which is never NULL and we should check the zone pointer. Also document this in first_zones_zonelist() comment per Michal Hocko. Fixes: c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice") Link: http://lkml.kernel.org/r/20170120103843.24587-2-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Acked-by: Hillf Danton Cc: Ganapatrao Kulkarni Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 36d9896fbc1e..f4aac87adcc3 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -972,12 +972,16 @@ static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z, * @zonelist - The zonelist to search for a suitable zone * @highest_zoneidx - The zone index of the highest zone to return * @nodes - An optional nodemask to filter the zonelist with - * @zone - The first suitable zone found is returned via this parameter + * @return - Zoneref pointer for the first suitable zone found (see below) * * This function returns the first zone at or below a given zone index that is * within the allowed nodemask. The zoneref returned is a cursor that can be * used to iterate the zonelist with next_zones_zonelist by advancing it by * one before calling. + * + * When no eligible zone is found, zoneref->zone is NULL (zoneref itself is + * never NULL). This may happen either genuinely, or due to concurrent nodemask + * update due to cpuset modification. */ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, enum zone_type highest_zoneidx, -- cgit v1.2.3 From 08d62f58aa2587132a930afbe8664379b430e2dd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 17 Jan 2017 13:57:26 +0200 Subject: dmaengine: dw: register IRQ and DMA pool with instance ID It is really useful not only for debugging to have an IRQ line and DMA pool labeled with driver and its instance ID. Do this for DesignWare DMA driver. All current users of this IP would be enhanced later on. Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- include/linux/dma/dw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h index ccfd0c3777df..b63b25814d77 100644 --- a/include/linux/dma/dw.h +++ b/include/linux/dma/dw.h @@ -23,6 +23,7 @@ struct dw_dma; /** * struct dw_dma_chip - representation of DesignWare DMA controller hardware * @dev: struct device of the DMA controller + * @id: instance ID * @irq: irq line * @regs: memory mapped I/O space * @clk: hclk clock @@ -31,6 +32,7 @@ struct dw_dma; */ struct dw_dma_chip { struct device *dev; + int id; int irq; void __iomem *regs; struct clk *clk; -- cgit v1.2.3 From 199244d69458770770890f8b5988a1b6cad701ad Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 17 Jan 2017 13:57:31 +0200 Subject: dmaengine: dw: add support of iDMA 32-bit hardware iDMA 32-bit is Intel designed DMA controller that behaves like Synopsys Designware DMA. This patch adds a support of the new Intel hardware. Due to iDMA 32-bit has no autoconfiguration the platform code must provide a platform data to dw_dma_probe(). By default full FIFO (1024 bytes) is assigned to channel 0. Here we slice FIFO on equal parts between channels for iDMA 32-bit case. Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- include/linux/platform_data/dma-dw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index e69e415d0d98..896cb71a382c 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -41,6 +41,7 @@ struct dw_dma_slave { * @is_private: The device channels should be marked as private and not for * by the general purpose DMA channel allocator. * @is_memcpy: The device channels do support memory-to-memory transfers. + * @is_idma32: The type of the DMA controller is iDMA32 * @chan_allocation_order: Allocate channels starting from 0 or 7 * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0. * @block_size: Maximum block size supported by the controller @@ -53,6 +54,7 @@ struct dw_dma_platform_data { unsigned int nr_channels; bool is_private; bool is_memcpy; + bool is_idma32; #define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ #define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */ unsigned char chan_allocation_order; -- cgit v1.2.3 From 728bbe75c82fdc6bdf157652a4351856ed08afc4 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Thu, 12 Jan 2017 14:52:19 -0600 Subject: misc: sram: Introduce support code for protect-exec sram type Some platforms, like many ARM SoCs, require the ability to run code from on-chip memory like SRAM for tasks like reconfiguring the SDRAM controller or entering low-power sleep modes. In order to do this we must be able to allocate memory that the code can be copied to but then change the mapping to be read-only and executable so that no memory is both writable and executable at the same time to avoid opening any unneccesary security holes. By using the existing "pool" partition type that the SRAM driver allows we can create a memory space that will already be exposed by the genalloc framework to allow for allocating memory but we must extend this to meet the executable requirements. By making use of various set_memory_* APIs we can change the attributes of pages to make them writable for code upload but then read-only and executable when we want to actually run code. Because SRAM is a shared resource we need a centralized manager of these set memory calls. Because the SRAM driver itself is responsible for allocating the memory we can introduce a sram_copy_exec API for the driver that works like memcpy but also manages the page attributes and locking to allow multiple users of the same SRAM space to all copy their code over independent of other each before starting execution. It is maintained in a separate file from the core SRAM driver to allow it to be selectively built depending on whether or not a platform has the appropriate set_memory_* APIs. A future patch will integrate it with the core SRAM driver. Signed-off-by: Dave Gerlach Acked-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- include/linux/sram.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/linux/sram.h (limited to 'include/linux') diff --git a/include/linux/sram.h b/include/linux/sram.h new file mode 100644 index 000000000000..c97dcbe8ce25 --- /dev/null +++ b/include/linux/sram.h @@ -0,0 +1,27 @@ +/* + * Generic SRAM Driver Interface + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef __LINUX_SRAM_H__ +#define __LINUX_SRAM_H__ + +struct gen_pool; + +#ifdef CONFIG_SRAM_EXEC +int sram_exec_copy(struct gen_pool *pool, void *dst, void *src, size_t size); +#else +static inline int sram_exec_copy(struct gen_pool *pool, void *dst, void *src, + size_t size) +{ + return -ENODEV; +} +#endif /* CONFIG_SRAM_EXEC */ +#endif /* __LINUX_SRAM_H__ */ -- cgit v1.2.3 From bcd375f7f71f7106c97516bf5395149954ef8810 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Sch=C3=B6lling?= Date: Fri, 13 Jan 2017 21:07:56 +0100 Subject: console: Add callback to flush scrollback buffer to consw struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This new callback is in preparation for persistent scrollback buffer support for VGA consoles. With a single scrollback buffer for all consoles, we could flush the buffer just by invocating consw->con_switch(). But when each VGA console has its own scrollback buffer, we need a new callback to tell the video console driver which buffer to flush. Signed-off-by: Manuel Schölling Reviewed-by: Andrey Utkin Tested-by: Andrey Utkin Tested-by: Adam Borowski Signed-off-by: Greg Kroah-Hartman --- include/linux/console.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 9c26c6685587..5949d1855589 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -72,6 +72,10 @@ struct consw { void (*con_invert_region)(struct vc_data *, u16 *, int); u16 *(*con_screen_pos)(struct vc_data *, int); unsigned long (*con_getxy)(struct vc_data *, unsigned long, int *, int *); + /* + * Flush the video console driver's scrollback buffer + */ + void (*con_flush_scrollback)(struct vc_data *); /* * Prepare the console for the debugger. This includes, but is not * limited to, unblanking the console, loading an appropriate -- cgit v1.2.3 From 93fbe91b552194af970256ce72934745d01df435 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Fri, 20 Jan 2017 10:15:07 +0100 Subject: iio: Add STM32 timer trigger driver Timers IPs can be used to generate triggers for other IPs like DAC or ADC. Each trigger may result of timer internals signals like counter enable, reset or edge, this configuration could be done through "master_mode" device attribute. Since triggers could be used by DAC or ADC their names are defined in include/ nux/iio/timer/stm32-timer-trigger.h and is_stm32_iio_timer_trigger function could be used to check if the trigger is valid or not. "trgo" trigger have a "sampling_frequency" attribute which allow to configure timer sampling frequency. version 8: - change kernel version from 4.10 to 4.11 in ABI documentation version 7: - remove all iio_device related code - move driver into trigger directory version 5: - simplify tables of triggers - only create an IIO device when needed version 4: - get triggers configuration from "reg" in DT - add tables of triggers - sampling frequency is enable/disable when writing in trigger sampling_frequency attribute - no more use of interruptions version 3: - change compatible to "st,stm32-timer-trigger" - fix attributes access right - use string instead of int for master_mode and slave_mode - document device attributes in sysfs-bus-iio-timer-stm32 version 2: - keep only one compatible - use st,input-triggers-names and st,output-triggers-names to know which triggers are accepted and/or create by the device Signed-off-by: Benjamin Gaignard Acked-by: Jonathan Cameron Signed-off-by: Lee Jones --- include/linux/iio/timer/stm32-timer-trigger.h | 62 +++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 include/linux/iio/timer/stm32-timer-trigger.h (limited to 'include/linux') diff --git a/include/linux/iio/timer/stm32-timer-trigger.h b/include/linux/iio/timer/stm32-timer-trigger.h new file mode 100644 index 000000000000..55535aef2e6c --- /dev/null +++ b/include/linux/iio/timer/stm32-timer-trigger.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) STMicroelectronics 2016 + * + * Author: Benjamin Gaignard + * + * License terms: GNU General Public License (GPL), version 2 + */ + +#ifndef _STM32_TIMER_TRIGGER_H_ +#define _STM32_TIMER_TRIGGER_H_ + +#define TIM1_TRGO "tim1_trgo" +#define TIM1_CH1 "tim1_ch1" +#define TIM1_CH2 "tim1_ch2" +#define TIM1_CH3 "tim1_ch3" +#define TIM1_CH4 "tim1_ch4" + +#define TIM2_TRGO "tim2_trgo" +#define TIM2_CH1 "tim2_ch1" +#define TIM2_CH2 "tim2_ch2" +#define TIM2_CH3 "tim2_ch3" +#define TIM2_CH4 "tim2_ch4" + +#define TIM3_TRGO "tim3_trgo" +#define TIM3_CH1 "tim3_ch1" +#define TIM3_CH2 "tim3_ch2" +#define TIM3_CH3 "tim3_ch3" +#define TIM3_CH4 "tim3_ch4" + +#define TIM4_TRGO "tim4_trgo" +#define TIM4_CH1 "tim4_ch1" +#define TIM4_CH2 "tim4_ch2" +#define TIM4_CH3 "tim4_ch3" +#define TIM4_CH4 "tim4_ch4" + +#define TIM5_TRGO "tim5_trgo" +#define TIM5_CH1 "tim5_ch1" +#define TIM5_CH2 "tim5_ch2" +#define TIM5_CH3 "tim5_ch3" +#define TIM5_CH4 "tim5_ch4" + +#define TIM6_TRGO "tim6_trgo" + +#define TIM7_TRGO "tim7_trgo" + +#define TIM8_TRGO "tim8_trgo" +#define TIM8_CH1 "tim8_ch1" +#define TIM8_CH2 "tim8_ch2" +#define TIM8_CH3 "tim8_ch3" +#define TIM8_CH4 "tim8_ch4" + +#define TIM9_TRGO "tim9_trgo" +#define TIM9_CH1 "tim9_ch1" +#define TIM9_CH2 "tim9_ch2" + +#define TIM12_TRGO "tim12_trgo" +#define TIM12_CH1 "tim12_ch1" +#define TIM12_CH2 "tim12_ch2" + +bool is_stm32_timer_trigger(struct iio_trigger *trig); + +#endif -- cgit v1.2.3 From 2acae0d5b0f73a8fb4b180bd13491feb96e55fc6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 25 Jan 2017 02:28:16 +0100 Subject: trace: add variant without spacing in trace_print_hex_seq For upcoming tracepoint support for BPF, we want to dump the program's tag. Format should be similar to __print_hex(), but without spacing. Add a __print_hex_str() variant for exactly that purpose that reuses trace_print_hex_seq(). Signed-off-by: Daniel Borkmann Cc: Steven Rostedt Cc: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/trace_events.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index be007610ceb0..cfa475a0e9ca 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -33,7 +33,8 @@ const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, unsigned int bitmask_size); const char *trace_print_hex_seq(struct trace_seq *p, - const unsigned char *buf, int len); + const unsigned char *buf, int len, + bool spacing); const char *trace_print_array_seq(struct trace_seq *p, const void *buf, int count, -- cgit v1.2.3 From a67edbf4fb6deadcfe57a04a134abed4a5ba3bb5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 25 Jan 2017 02:28:18 +0100 Subject: bpf: add initial bpf tracepoints This work adds a number of tracepoints to paths that are either considered slow-path or exception-like states, where monitoring or inspecting them would be desirable. For bpf(2) syscall, tracepoints have been placed for main commands when they succeed. In XDP case, tracepoint is for exceptions, that is, f.e. on abnormal BPF program exit such as unknown or XDP_ABORTED return code, or when error occurs during XDP_TX action and the packet could not be forwarded. Both have been split into separate event headers, and can be further extended. Worst case, if they unexpectedly should get into our way in future, they can also removed [1]. Of course, these tracepoints (like any other) can be analyzed by eBPF itself, etc. Example output: # ./perf record -a -e bpf:* sleep 10 # ./perf script sock_example 6197 [005] 283.980322: bpf:bpf_map_create: map type=ARRAY ufd=4 key=4 val=8 max=256 flags=0 sock_example 6197 [005] 283.980721: bpf:bpf_prog_load: prog=a5ea8fa30ea6849c type=SOCKET_FILTER ufd=5 sock_example 6197 [005] 283.988423: bpf:bpf_prog_get_type: prog=a5ea8fa30ea6849c type=SOCKET_FILTER sock_example 6197 [005] 283.988443: bpf:bpf_map_lookup_elem: map type=ARRAY ufd=4 key=[06 00 00 00] val=[00 00 00 00 00 00 00 00] [...] sock_example 6197 [005] 288.990868: bpf:bpf_map_lookup_elem: map type=ARRAY ufd=4 key=[01 00 00 00] val=[14 00 00 00 00 00 00 00] swapper 0 [005] 289.338243: bpf:bpf_prog_put_rcu: prog=a5ea8fa30ea6849c type=SOCKET_FILTER [1] https://lwn.net/Articles/705270/ Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf_trace.h | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 include/linux/bpf_trace.h (limited to 'include/linux') diff --git a/include/linux/bpf_trace.h b/include/linux/bpf_trace.h new file mode 100644 index 000000000000..b22efbdd2eb4 --- /dev/null +++ b/include/linux/bpf_trace.h @@ -0,0 +1,7 @@ +#ifndef __LINUX_BPF_TRACE_H__ +#define __LINUX_BPF_TRACE_H__ + +#include +#include + +#endif /* __LINUX_BPF_TRACE_H__ */ -- cgit v1.2.3 From 19f6d3f3c8422d65b5e3d2162e30ef07c6e21ea2 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 23 Jan 2017 10:59:22 -0800 Subject: net/tcp-fastopen: Add new API support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a new socket option, TCP_FASTOPEN_CONNECT, as an alternative way to perform Fast Open on the active side (client). Prior to this patch, a client needs to replace the connect() call with sendto(MSG_FASTOPEN). This can be cumbersome for applications who want to use Fast Open: these socket operations are often done in lower layer libraries used by many other applications. Changing these libraries and/or the socket call sequences are not trivial. A more convenient approach is to perform Fast Open by simply enabling a socket option when the socket is created w/o changing other socket calls sequence: s = socket() create a new socket setsockopt(s, IPPROTO_TCP, TCP_FASTOPEN_CONNECT …); newly introduced sockopt If set, new functionality described below will be used. Return ENOTSUPP if TFO is not supported or not enabled in the kernel. connect() With cookie present, return 0 immediately. With no cookie, initiate 3WHS with TFO cookie-request option and return -1 with errno = EINPROGRESS. write()/sendmsg() With cookie present, send out SYN with data and return the number of bytes buffered. With no cookie, and 3WHS not yet completed, return -1 with errno = EINPROGRESS. No MSG_FASTOPEN flag is needed. read() Return -1 with errno = EWOULDBLOCK/EAGAIN if connect() is called but write() is not called yet. Return -1 with errno = EWOULDBLOCK/EAGAIN if connection is established but no msg is received yet. Return number of bytes read if socket is established and there is msg received. The new API simplifies life for applications that always perform a write() immediately after a successful connect(). Such applications can now take advantage of Fast Open by merely making one new setsockopt() call at the time of creating the socket. Nothing else about the application's socket call sequence needs to change. Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 5371b3d70cfe..f88f4649ba6f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -222,7 +222,8 @@ struct tcp_sock { u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ - unused:5; + fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ + unused:4; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ unused1 : 1, -- cgit v1.2.3 From d6f8cfa3dea294eabf8f302e90176dd6381fb66e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 25 Jan 2017 11:39:49 +0100 Subject: net: phy: leds: Break dependency of phy.h on phy_led_triggers.h includes , which is not really needed. Drop the include from , and add it to all users that didn't include it explicitly. Suggested-by: Andrew Lunn Signed-off-by: Geert Uytterhoeven Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index f7d95f644eed..7fc1105605bf 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -25,7 +25,6 @@ #include #include #include -#include #include -- cgit v1.2.3 From 3c880eb0205222bb062970085ebedc73ec8dfd14 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 25 Jan 2017 11:39:50 +0100 Subject: net: phy: leds: Fix truncated LED trigger names Commit 4567d686f5c6d955 ("phy: increase size of MII_BUS_ID_SIZE and bus_id") increased the size of MII bus IDs, but forgot to update the private definition in . This may cause: 1. Truncation of LED trigger names, 2. Duplicate LED trigger names, 3. Failures registering LED triggers, 4. Crashes due to bad error handling in the LED trigger failure path. To fix this, and prevent the definitions going out of sync again in the future, let the PHY LED trigger code use the existing MII_BUS_ID_SIZE definition. Example: - Before I had triggers "ee700000.etherne:01:100Mbps" and "ee700000.etherne:01:10Mbps", - After the increase of MII_BUS_ID_SIZE, both became "ee700000.ethernet-ffffffff:01:" => FAIL, - Now, the triggers are "ee700000.ethernet-ffffffff:01:100Mbps" and "ee700000.ethernet-ffffffff:01:10Mbps", which are unique again. Fixes: 4567d686f5c6d955 ("phy: increase size of MII_BUS_ID_SIZE and bus_id") Fixes: 2e0bc452f4721520 ("net: phy: leds: add support for led triggers on phy link state change") Signed-off-by: Geert Uytterhoeven Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy_led_triggers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy_led_triggers.h b/include/linux/phy_led_triggers.h index a2daea0a37d2..b37b05bfd1a6 100644 --- a/include/linux/phy_led_triggers.h +++ b/include/linux/phy_led_triggers.h @@ -18,11 +18,11 @@ struct phy_device; #ifdef CONFIG_LED_TRIGGER_PHY #include +#include #define PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE 10 -#define PHY_MII_BUS_ID_SIZE (20 - 3) -#define PHY_LINK_LED_TRIGGER_NAME_SIZE (PHY_MII_BUS_ID_SIZE + \ +#define PHY_LINK_LED_TRIGGER_NAME_SIZE (MII_BUS_ID_SIZE + \ FIELD_SIZEOF(struct mdio_device, addr)+\ PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE) -- cgit v1.2.3 From f2c4689640e9a34bc45c013032185ed4ce47e7ff Mon Sep 17 00:00:00 2001 From: Lance Roy Date: Mon, 23 Jan 2017 13:35:18 -0800 Subject: srcu: Implement more-efficient reader counts SRCU uses two per-cpu counters: a nesting counter to count the number of active critical sections, and a sequence counter to ensure that the nesting counters don't change while they are being added together in srcu_readers_active_idx_check(). This patch instead uses per-cpu lock and unlock counters. Because both counters only increase and srcu_readers_active_idx_check() reads the unlock counter before the lock counter, this achieves the same end without having to increment two different counters in srcu_read_lock(). This also saves a smp_mb() in srcu_readers_active_idx_check(). Possible bug: There is no guarantee that the lock counter won't overflow during srcu_readers_active_idx_check(), as there are no memory barriers around srcu_flip() (see comment in srcu_readers_active_idx_check() for details). However, this problem was already present before this patch. Suggested-by: Mathieu Desnoyers Signed-off-by: Lance Roy Cc: Paul E. McKenney Cc: Lai Jiangshan Cc: Peter Zijlstra Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index dc8eb63c6568..a598cf3ac70c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -33,9 +33,9 @@ #include #include -struct srcu_struct_array { - unsigned long c[2]; - unsigned long seq[2]; +struct srcu_array { + unsigned long lock_count[2]; + unsigned long unlock_count[2]; }; struct rcu_batch { @@ -46,7 +46,7 @@ struct rcu_batch { struct srcu_struct { unsigned long completed; - struct srcu_struct_array __percpu *per_cpu_ref; + struct srcu_array __percpu *per_cpu_ref; spinlock_t queue_lock; /* protect ->batch_queue, ->running */ bool running; /* callbacks just queued */ @@ -118,7 +118,7 @@ void process_srcu(struct work_struct *work); * See include/linux/percpu-defs.h for the rules on per-CPU variables. */ #define __DEFINE_SRCU(name, is_static) \ - static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ + static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\ is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) -- cgit v1.2.3 From d85b62f18d543c663cbdd6061054efeb9e66cee7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 Nov 2016 12:08:49 -0800 Subject: srcu: Force full grace-period ordering If a process invokes synchronize_srcu(), is delayed just the right amount of time, and thus does not sleep when waiting for the grace period to complete, there is no ordering between the end of the grace period and the code following the synchronize_srcu(). Similarly, there can be a lack of ordering between the end of the SRCU grace period and callback invocation. This commit adds the necessary ordering. Reported-by: Lance Roy Signed-off-by: Paul E. McKenney [ paulmck: Further smp_mb() adjustment per email with Lance Roy. ] --- include/linux/rcupdate.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 01f71e1d2e94..6ade6a52d9d4 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1161,5 +1161,17 @@ do { \ ftrace_dump(oops_dump_mode); \ } while (0) +/* + * Place this after a lock-acquisition primitive to guarantee that + * an UNLOCK+LOCK pair acts as a full barrier. This guarantee applies + * if the UNLOCK and LOCK are executed by the same CPU or if the + * UNLOCK and LOCK operate on the same lock variable. + */ +#ifdef CONFIG_PPC +#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */ +#else /* #ifdef CONFIG_PPC */ +#define smp_mb__after_unlock_lock() do { } while (0) +#endif /* #else #ifdef CONFIG_PPC */ + #endif /* __LINUX_RCUPDATE_H */ -- cgit v1.2.3 From 85b4685da52f6680635370c2cfb9a42f04ac9652 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 25 Jan 2017 21:00:25 +0100 Subject: net: phy: broadcom: use auxctl reading helper in BCM54612E code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting with commit 5b4e29005123 ("net: phy: broadcom: add bcm54xx_auxctl_read") we have a reading helper so use it and avoid code duplication. It also means we don't need MII_BCM54XX_AUXCTL_SHDWSEL_MISC define as it's the same as MII_BCM54XX_AUXCTL_SHDWSEL_MISC just for reading needs (same value shifted by 12 bits). Signed-off-by: Rafał Miłecki Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 295fb3e73de5..34e61004b9dc 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -111,7 +111,6 @@ #define MII_BCM54XX_AUXCTL_MISC_WREN 0x8000 #define MII_BCM54XX_AUXCTL_MISC_RXD_RXC_SKEW 0x0100 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 -#define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC 0x7000 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x0007 #define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT 12 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN (1 << 8) -- cgit v1.2.3 From 8293c7bcdef1b866610bf21dee3eb0d181cee753 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 25 Jan 2017 21:00:26 +0100 Subject: net: phy: broadcom: drop duplicated define for RGMII SKEW delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We had two defines for the same bit (both were used with the MII_BCM54XX_AUXCTL_SHDWSEL_MISC register). Signed-off-by: Rafał Miłecki Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 34e61004b9dc..f9cb73df127e 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -109,7 +109,6 @@ #define MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA 0x0800 #define MII_BCM54XX_AUXCTL_MISC_WREN 0x8000 -#define MII_BCM54XX_AUXCTL_MISC_RXD_RXC_SKEW 0x0100 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x0007 #define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT 12 -- cgit v1.2.3 From 5e7bfa6cb0a94c673f6d565e58353366d88e2aa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 25 Jan 2017 21:00:27 +0100 Subject: net: phy: bcm-phy-lib: clean up remaining AUXCTL register defines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1) Use 0x%02x format for register number. This follows some other defines and makes it easier to distinct register from values. 2) Put register define above values and sort the values. It makes reading header code easier. 3) Use 0x%04x format for all values. It's about consistency with other values (and most of the header) not a personal preference. 4) Separate define for reading shift value with an extre empty line. It's user for all AUXCTL registers in a bcm54xx_auxctl_read. Signed-off-by: Rafał Miłecki Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index f9cb73df127e..cf93f1399d3e 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -104,17 +104,17 @@ /* * AUXILIARY CONTROL SHADOW ACCESS REGISTERS. (PHY REG 0x18) */ -#define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL 0x0000 +#define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL 0x00 #define MII_BCM54XX_AUXCTL_ACTL_TX_6DB 0x0400 #define MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA 0x0800 -#define MII_BCM54XX_AUXCTL_MISC_WREN 0x8000 -#define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 -#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x0007 -#define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT 12 -#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN (1 << 8) -#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN (1 << 4) +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x07 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN 0x0010 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN 0x0100 +#define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 +#define MII_BCM54XX_AUXCTL_MISC_WREN 0x8000 +#define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT 12 #define MII_BCM54XX_AUXCTL_SHDWSEL_MASK 0x0007 /* -- cgit v1.2.3 From a264d10ff45c688293d9112fddd8d29c819e0853 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 9 Jan 2017 16:02:28 +0200 Subject: gpiolib: Convert fwnode_get_named_gpiod() to configure GPIO Make fwnode_get_named_gpiod() consistent with the rest of gpiod_get() like API, i.e. configure GPIO pin immediately after request. Besides obvious clean up it will help to configure pins based on firmware provided resources. Reviewed-by: Mika Westerberg Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index fb0fde686cb1..930d10049d8d 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -135,10 +135,12 @@ int desc_to_gpio(const struct gpio_desc *desc); struct fwnode_handle; struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, - const char *propname); + const char *propname, + enum gpiod_flags dflags); struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, const char *con_id, - struct fwnode_handle *child); + struct fwnode_handle *child, + enum gpiod_flags flags); #else /* CONFIG_GPIOLIB */ static inline int gpiod_count(struct device *dev, const char *con_id) @@ -411,14 +413,19 @@ static inline int desc_to_gpio(const struct gpio_desc *desc) /* Child properties interface */ struct fwnode_handle; -static inline struct gpio_desc *fwnode_get_named_gpiod( - struct fwnode_handle *fwnode, const char *propname) +static inline +struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, + const char *propname, + enum gpiod_flags dflags) { return ERR_PTR(-ENOSYS); } -static inline struct gpio_desc *devm_get_gpiod_from_child( - struct device *dev, const char *con_id, struct fwnode_handle *child) +static inline +struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, + const char *con_id, + struct fwnode_handle *child, + enum gpiod_flags flags) { return ERR_PTR(-ENOSYS); } -- cgit v1.2.3 From b2987d7438e0ca949d81774ca8b43d370a1f9947 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 12 Jan 2017 17:39:24 +0100 Subject: gpio: Pass GPIO label down to gpiod_request Currently all users of fwnode_get_named_gpiod() have no way to specify a label for the GPIO. So GPIOs listed in debugfs are shown with label "?". With this change a proper label is used. Also adjust all users so they can pass a label, properly retrieved from device tree properties. Signed-off-by: Alexander Stein Cc: Andy Shevchenko Acked-by: Jacek Anaszewski Acked-by: Bartlomiej Zolnierkiewicz Acked-by: Dmitry Torokhov Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 930d10049d8d..80bad7ebde04 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -136,11 +136,13 @@ struct fwnode_handle; struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, const char *propname, - enum gpiod_flags dflags); + enum gpiod_flags dflags, + const char *label); struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, const char *con_id, struct fwnode_handle *child, - enum gpiod_flags flags); + enum gpiod_flags flags, + const char *label); #else /* CONFIG_GPIOLIB */ static inline int gpiod_count(struct device *dev, const char *con_id) @@ -416,7 +418,8 @@ struct fwnode_handle; static inline struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, const char *propname, - enum gpiod_flags dflags) + enum gpiod_flags dflags, + const char *label) { return ERR_PTR(-ENOSYS); } @@ -425,7 +428,8 @@ static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, const char *con_id, struct fwnode_handle *child, - enum gpiod_flags flags) + enum gpiod_flags flags, + const char *label) { return ERR_PTR(-ENOSYS); } -- cgit v1.2.3 From 53d333ac93911dab22d12a78b0a6414f9afb0117 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Tue, 17 Jan 2017 21:49:12 +0530 Subject: gpio: davinci: Remove unwanted blank line Remove redundant blank line. Signed-off-by: Keerthy Reviewed-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/platform_data/gpio-davinci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h index 44ca5303849c..18127c4aa4ba 100644 --- a/include/linux/platform_data/gpio-davinci.h +++ b/include/linux/platform_data/gpio-davinci.h @@ -26,7 +26,6 @@ struct davinci_gpio_platform_data { u32 gpio_unbanked; }; - struct davinci_gpio_controller { struct gpio_chip chip; struct irq_domain *irq_domain; -- cgit v1.2.3 From b5cf3fd827d2e11355c126b44ea625650ebf4d39 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Fri, 13 Jan 2017 09:50:12 +0530 Subject: gpio: davinci: Redesign driver to accommodate ngpios in one gpio chip The Davinci GPIO driver is implemented to work with one monolithic Davinci GPIO platform device which may have up to Y(144) gpios. The Davinci GPIO driver instantiates number of GPIO chips with max 32 gpio pins per each during initialization and one IRQ domain. So, the current GPIO's opjects structure is: Davinci GPIO controller |- ------| ... |--- irq_domain (hwirq [0..143]) |- ------| Current driver creates one chip for every 32 GPIOs in a controller. This was a limitation earlier now there is no need for that. Hence redesigning the driver to create one gpio chip for all the ngpio in the controller. |- ------|--- irq_domain (hwirq [0..143]). The previous discussion on this can be found here: https://www.spinics.net/lists/linux-omap/msg132869.html Signed-off-by: Keerthy Signed-off-by: Linus Walleij --- include/linux/platform_data/gpio-davinci.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h index 18127c4aa4ba..c62a9438976d 100644 --- a/include/linux/platform_data/gpio-davinci.h +++ b/include/linux/platform_data/gpio-davinci.h @@ -21,19 +21,27 @@ #include +#define MAX_REGS_BANKS 5 + struct davinci_gpio_platform_data { u32 ngpio; u32 gpio_unbanked; }; +struct davinci_gpio_irq_data { + void __iomem *regs; + struct davinci_gpio_controller *chip; + int bank_num; +}; + struct davinci_gpio_controller { struct gpio_chip chip; struct irq_domain *irq_domain; /* Serialize access to GPIO registers */ spinlock_t lock; - void __iomem *regs; + void __iomem *regs[MAX_REGS_BANKS]; int gpio_unbanked; - unsigned gpio_irq; + unsigned int base_irq; }; /* -- cgit v1.2.3 From 8e11047b8f3cc0dc6df956cf01915077a574168e Mon Sep 17 00:00:00 2001 From: Keerthy Date: Tue, 17 Jan 2017 21:49:14 +0530 Subject: gpio: davinci: Add support for multiple GPIO controllers Update GPIO driver to support Multiple GPIO controllers by updating the base of subsequent GPIO chips with total of previous chips gpio count so that gpio_add_chip gets unique numbers. Signed-off-by: Keerthy Reviewed-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/platform_data/gpio-davinci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h index c62a9438976d..90ae19ca828f 100644 --- a/include/linux/platform_data/gpio-davinci.h +++ b/include/linux/platform_data/gpio-davinci.h @@ -42,6 +42,7 @@ struct davinci_gpio_controller { void __iomem *regs[MAX_REGS_BANKS]; int gpio_unbanked; unsigned int base_irq; + unsigned int base; }; /* -- cgit v1.2.3 From 58957d2edfa19e9b8f80385ba042495058e5e60e Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 23 Jan 2017 15:34:32 +0300 Subject: pinctrl: Widen the generic pinconf argument from 16 to 24 bits The current pinconf packed format allows only 16-bit argument limiting the maximum value 65535. For most types this is enough. However, debounce time can be in range of hundreths of milliseconds in case of mechanical switches so we cannot represent the worst case using the current format. In order to support larger values change the packed format so that the lower 8 bits are used as type which leaves 24 bits for the argument. This allows representing values up to 16777215 and debounce times up to 16 seconds. We also convert the existing users to use 32-bit integer when extracting argument from the packed configuration value. Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 12343caa114e..9a09107c890e 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -92,6 +92,8 @@ * @PIN_CONFIG_END: this is the last enumerator for pin configurations, if * you need to pass in custom configurations to the pin controller, use * PIN_CONFIG_END+1 as the base offset. + * @PIN_CONFIG_MAX: this is the maximum configuration value that can be + * presented using the packed format. */ enum pin_config_param { PIN_CONFIG_BIAS_BUS_HOLD, @@ -112,7 +114,8 @@ enum pin_config_param { PIN_CONFIG_OUTPUT, PIN_CONFIG_POWER_SOURCE, PIN_CONFIG_SLEW_RATE, - PIN_CONFIG_END = 0x7FFF, + PIN_CONFIG_END = 0x7F, + PIN_CONFIG_MAX = 0xFF, }; #ifdef CONFIG_DEBUG_FS @@ -130,27 +133,27 @@ struct pin_config_item { /* * Helpful configuration macro to be used in tables etc. */ -#define PIN_CONF_PACKED(p, a) ((a << 16) | ((unsigned long) p & 0xffffUL)) +#define PIN_CONF_PACKED(p, a) ((a << 8) | ((unsigned long) p & 0xffUL)) /* * The following inlines stuffs a configuration parameter and data value * into and out of an unsigned long argument, as used by the generic pin config - * system. We put the parameter in the lower 16 bits and the argument in the - * upper 16 bits. + * system. We put the parameter in the lower 8 bits and the argument in the + * upper 24 bits. */ static inline enum pin_config_param pinconf_to_config_param(unsigned long config) { - return (enum pin_config_param) (config & 0xffffUL); + return (enum pin_config_param) (config & 0xffUL); } -static inline u16 pinconf_to_config_argument(unsigned long config) +static inline u32 pinconf_to_config_argument(unsigned long config) { - return (enum pin_config_param) ((config >> 16) & 0xffffUL); + return (u32) ((config >> 8) & 0xffffffUL); } static inline unsigned long pinconf_to_config_packed(enum pin_config_param param, - u16 argument) + u32 argument) { return PIN_CONF_PACKED(param, argument); } -- cgit v1.2.3 From 15381bc7c7f52d56f87c56dd7c948ad78704b852 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 23 Jan 2017 15:34:33 +0300 Subject: pinctrl: Allow configuration of pins from gpiolib based drivers When a GPIO driver is backed by a pinctrl driver the GPIO driver sometimes needs to call the pinctrl driver to configure certain things, like whether the pin is used as input or output. In addition to this there are other configurations applicable to GPIOs such as setting debounce time of the GPIO. To support this we introduce a new function pinctrl_gpio_set_config() that can be used by gpiolib based driver to pass configuration requests to the backing pinctrl driver. Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- include/linux/pinctrl/consumer.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h index d7e5d608faa7..a0f2aba72fa9 100644 --- a/include/linux/pinctrl/consumer.h +++ b/include/linux/pinctrl/consumer.h @@ -29,6 +29,7 @@ extern int pinctrl_request_gpio(unsigned gpio); extern void pinctrl_free_gpio(unsigned gpio); extern int pinctrl_gpio_direction_input(unsigned gpio); extern int pinctrl_gpio_direction_output(unsigned gpio); +extern int pinctrl_gpio_set_config(unsigned gpio, unsigned long config); extern struct pinctrl * __must_check pinctrl_get(struct device *dev); extern void pinctrl_put(struct pinctrl *p); @@ -80,6 +81,11 @@ static inline int pinctrl_gpio_direction_output(unsigned gpio) return 0; } +static inline int pinctrl_gpio_set_config(unsigned gpio, unsigned long config) +{ + return 0; +} + static inline struct pinctrl * __must_check pinctrl_get(struct device *dev) { return NULL; -- cgit v1.2.3 From 2956b5d94a76b596fa5057c2b3ca915cb27d7652 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 23 Jan 2017 15:34:34 +0300 Subject: pinctrl / gpio: Introduce .set_config() callback for GPIO chips Currently we already have two pin configuration related callbacks available for GPIO chips .set_single_ended() and .set_debounce(). In future we expect to have even more, which does not scale well if we need to add yet another callback to the GPIO chip structure for each possible configuration parameter. Better solution is to reuse what we already have available in the generic pinconf. To support this, we introduce a new .set_config() callback for GPIO chips. The callback takes a single packed pin configuration value as parameter. This can then be extended easily beyond what is currently supported by just adding new types to the generic pinconf enum. If the GPIO driver is backed up by a pinctrl driver the GPIO driver can just assign gpiochip_generic_config() (introduced in this patch) to .set_config and that will take care configuration requests are directed to the pinctrl driver. We then convert the existing drivers over .set_config() and finally remove the .set_single_ended() and .set_debounce() callbacks. Suggested-by: Linus Walleij Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 37 +++++++-------------------------- include/linux/pinctrl/pinconf-generic.h | 33 +++++++++++++---------------- 2 files changed, 23 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index c2748accea71..db2022910caf 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -8,6 +8,7 @@ #include #include #include +#include struct gpio_desc; struct of_phandle_args; @@ -18,18 +19,6 @@ struct module; #ifdef CONFIG_GPIOLIB -/** - * enum single_ended_mode - mode for single ended operation - * @LINE_MODE_PUSH_PULL: normal mode for a GPIO line, drive actively high/low - * @LINE_MODE_OPEN_DRAIN: set line to be open drain - * @LINE_MODE_OPEN_SOURCE: set line to be open source - */ -enum single_ended_mode { - LINE_MODE_PUSH_PULL, - LINE_MODE_OPEN_DRAIN, - LINE_MODE_OPEN_SOURCE, -}; - /** * struct gpio_chip - abstract a GPIO controller * @label: a functional name for the GPIO device, such as a part @@ -48,16 +37,8 @@ enum single_ended_mode { * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @set: assigns output value for signal "offset" * @set_multiple: assigns output values for multiple signals defined by "mask" - * @set_debounce: optional hook for setting debounce time for specified gpio in - * interrupt triggered gpio chips - * @set_single_ended: optional hook for setting a line as open drain, open - * source, or non-single ended (restore from open drain/source to normal - * push-pull mode) this should be implemented if the hardware supports - * open drain or open source settings. The GPIOlib will otherwise try - * to emulate open drain/source by not actively driving lines high/low - * if a consumer request this. The driver may return -ENOTSUPP if e.g. - * it supports just open drain but not open source and is called - * with LINE_MODE_OPEN_SOURCE as mode argument. + * @set_config: optional hook for all kinds of settings. Uses the same + * packed config format as generic pinconf. * @to_irq: optional hook supporting non-static gpio_to_irq() mappings; * implementation may not sleep * @dbg_show: optional routine to show contents in debugfs; default code @@ -150,13 +131,9 @@ struct gpio_chip { void (*set_multiple)(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits); - int (*set_debounce)(struct gpio_chip *chip, - unsigned offset, - unsigned debounce); - int (*set_single_ended)(struct gpio_chip *chip, - unsigned offset, - enum single_ended_mode mode); - + int (*set_config)(struct gpio_chip *chip, + unsigned offset, + unsigned long config); int (*to_irq)(struct gpio_chip *chip, unsigned offset); @@ -310,6 +287,8 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, int gpiochip_generic_request(struct gpio_chip *chip, unsigned offset); void gpiochip_generic_free(struct gpio_chip *chip, unsigned offset); +int gpiochip_generic_config(struct gpio_chip *chip, unsigned offset, + unsigned long config); #ifdef CONFIG_PINCTRL diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 9a09107c890e..7620eb127cff 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -12,12 +12,6 @@ #ifndef __LINUX_PINCTRL_PINCONF_GENERIC_H #define __LINUX_PINCTRL_PINCONF_GENERIC_H -/* - * You shouldn't even be able to compile with these enums etc unless you're - * using generic pin config. That is why this is defined out. - */ -#ifdef CONFIG_GENERIC_PINCONF - /** * enum pin_config_param - possible pin configuration parameters * @PIN_CONFIG_BIAS_BUS_HOLD: the pin will be set to weakly latch so that it @@ -118,18 +112,6 @@ enum pin_config_param { PIN_CONFIG_MAX = 0xFF, }; -#ifdef CONFIG_DEBUG_FS -#define PCONFDUMP(a, b, c, d) { .param = a, .display = b, .format = c, \ - .has_arg = d } - -struct pin_config_item { - const enum pin_config_param param; - const char * const display; - const char * const format; - bool has_arg; -}; -#endif /* CONFIG_DEBUG_FS */ - /* * Helpful configuration macro to be used in tables etc. */ @@ -158,6 +140,21 @@ static inline unsigned long pinconf_to_config_packed(enum pin_config_param param return PIN_CONF_PACKED(param, argument); } +#ifdef CONFIG_GENERIC_PINCONF + +#ifdef CONFIG_DEBUG_FS +#define PCONFDUMP(a, b, c, d) { \ + .param = a, .display = b, .format = c, .has_arg = d \ + } + +struct pin_config_item { + const enum pin_config_param param; + const char * const display; + const char * const format; + bool has_arg; +}; +#endif /* CONFIG_DEBUG_FS */ + #ifdef CONFIG_OF #include -- cgit v1.2.3 From 228c8c6b1f4376788e9d5ab00d50b10228eb40d3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Jan 2017 17:15:44 +0100 Subject: wireless: define cipher/AKM suites using a macro The spec writes cipher/AKM suites as something like 00-0F-AC:9, but the part after the colon isn't hex, it's decimal, so that we've already had a few mistakes (in other code, or unmerged patches) to e.g. write 0x000FAC10 instead of 0x000FAC0A. Use a macro to avoid that problem. Reviewed-by: Luca Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 87d1937e4671..02768de209d6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2324,31 +2324,33 @@ enum ieee80211_sa_query_action { }; +#define SUITE(oui, id) (((oui) << 8) | (id)) + /* cipher suite selectors */ -#define WLAN_CIPHER_SUITE_USE_GROUP 0x000FAC00 -#define WLAN_CIPHER_SUITE_WEP40 0x000FAC01 -#define WLAN_CIPHER_SUITE_TKIP 0x000FAC02 -/* reserved: 0x000FAC03 */ -#define WLAN_CIPHER_SUITE_CCMP 0x000FAC04 -#define WLAN_CIPHER_SUITE_WEP104 0x000FAC05 -#define WLAN_CIPHER_SUITE_AES_CMAC 0x000FAC06 -#define WLAN_CIPHER_SUITE_GCMP 0x000FAC08 -#define WLAN_CIPHER_SUITE_GCMP_256 0x000FAC09 -#define WLAN_CIPHER_SUITE_CCMP_256 0x000FAC0A -#define WLAN_CIPHER_SUITE_BIP_GMAC_128 0x000FAC0B -#define WLAN_CIPHER_SUITE_BIP_GMAC_256 0x000FAC0C -#define WLAN_CIPHER_SUITE_BIP_CMAC_256 0x000FAC0D - -#define WLAN_CIPHER_SUITE_SMS4 0x00147201 +#define WLAN_CIPHER_SUITE_USE_GROUP SUITE(0x000FAC, 0) +#define WLAN_CIPHER_SUITE_WEP40 SUITE(0x000FAC, 1) +#define WLAN_CIPHER_SUITE_TKIP SUITE(0x000FAC, 2) +/* reserved: SUITE(0x000FAC, 3) */ +#define WLAN_CIPHER_SUITE_CCMP SUITE(0x000FAC, 4) +#define WLAN_CIPHER_SUITE_WEP104 SUITE(0x000FAC, 5) +#define WLAN_CIPHER_SUITE_AES_CMAC SUITE(0x000FAC, 6) +#define WLAN_CIPHER_SUITE_GCMP SUITE(0x000FAC, 8) +#define WLAN_CIPHER_SUITE_GCMP_256 SUITE(0x000FAC, 9) +#define WLAN_CIPHER_SUITE_CCMP_256 SUITE(0x000FAC, 10) +#define WLAN_CIPHER_SUITE_BIP_GMAC_128 SUITE(0x000FAC, 11) +#define WLAN_CIPHER_SUITE_BIP_GMAC_256 SUITE(0x000FAC, 12) +#define WLAN_CIPHER_SUITE_BIP_CMAC_256 SUITE(0x000FAC, 13) + +#define WLAN_CIPHER_SUITE_SMS4 SUITE(0x001472, 1) /* AKM suite selectors */ -#define WLAN_AKM_SUITE_8021X 0x000FAC01 -#define WLAN_AKM_SUITE_PSK 0x000FAC02 -#define WLAN_AKM_SUITE_8021X_SHA256 0x000FAC05 -#define WLAN_AKM_SUITE_PSK_SHA256 0x000FAC06 -#define WLAN_AKM_SUITE_TDLS 0x000FAC07 -#define WLAN_AKM_SUITE_SAE 0x000FAC08 -#define WLAN_AKM_SUITE_FT_OVER_SAE 0x000FAC09 +#define WLAN_AKM_SUITE_8021X SUITE(0x000FAC, 1) +#define WLAN_AKM_SUITE_PSK SUITE(0x000FAC, 2) +#define WLAN_AKM_SUITE_8021X_SHA256 SUITE(0x000FAC, 5) +#define WLAN_AKM_SUITE_PSK_SHA256 SUITE(0x000FAC, 6) +#define WLAN_AKM_SUITE_TDLS SUITE(0x000FAC, 7) +#define WLAN_AKM_SUITE_SAE SUITE(0x000FAC, 8) +#define WLAN_AKM_SUITE_FT_OVER_SAE SUITE(0x000FAC, 9) #define WLAN_MAX_KEY_LEN 32 -- cgit v1.2.3 From 65e251a4634c5644efca6f7e15803f0962d8943d Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 3 Jan 2017 17:34:56 +0000 Subject: iommu: Drop the of_iommu_{set/get}_ops() interface With the introduction of the new iommu_{register/get}_instance() interface in commit e4f10ffe4c9b ("iommu: Make of_iommu_set/get_ops() DT agnostic") (based on struct fwnode_handle as look-up token, so firmware agnostic) to register IOMMU instances with the core IOMMU layer there is no reason to keep the old OF based interface around any longer. Convert all the IOMMU drivers (and OF IOMMU core code) that rely on the of_iommu_{set/get}_ops() to the new kernel interface to register/retrieve IOMMU instances and remove the of_iommu_{set/get}_ops() remaining glue code in order to complete the interface rework. Cc: Matthias Brugger Cc: Will Deacon Cc: Joerg Roedel Cc: Marek Szyprowski Reviewed-by: Robin Murphy Tested-by: Sricharan R Tested-by: Yong Wu Signed-off-by: Lorenzo Pieralisi Signed-off-by: Will Deacon --- include/linux/of_iommu.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 6a7fc5051099..13394ac83c66 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -31,17 +31,6 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev, #endif /* CONFIG_OF_IOMMU */ -static inline void of_iommu_set_ops(struct device_node *np, - const struct iommu_ops *ops) -{ - iommu_register_instance(&np->fwnode, ops); -} - -static inline const struct iommu_ops *of_iommu_get_ops(struct device_node *np) -{ - return iommu_get_instance(&np->fwnode); -} - extern struct of_device_id __iommu_of_table; typedef int (*of_iommu_init_fn)(struct device_node *); -- cgit v1.2.3 From 1141d9d08184565b71a943a50ffe712b2dfc697f Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Mon, 23 Jan 2017 12:07:42 -0600 Subject: clk: x86: Add Atom PMC platform clocks The BayTrail and CherryTrail platforms provide platform clocks through their Power Management Controller (PMC). The SoC supports up to 6 clocks (PMC_PLT_CLK[0..5]) with a frequency of either 19.2 MHz (PLL) or 25 MHz (XTAL) for BayTrail and a frequency of 19.2 MHz (XTAL) for CherryTrail. These clocks are available for general system use, where appropriate, and each have Control & Frequency register fields associated with them. Port from legacy by Pierre Bossart, integration in clock framework by Irina Tirdea Signed-off-by: Irina Tirdea Signed-off-by: Pierre-Louis Bossart Acked-by: Andy Shevchenko Signed-off-by: Stephen Boyd --- include/linux/platform_data/x86/clk-pmc-atom.h | 44 ++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 include/linux/platform_data/x86/clk-pmc-atom.h (limited to 'include/linux') diff --git a/include/linux/platform_data/x86/clk-pmc-atom.h b/include/linux/platform_data/x86/clk-pmc-atom.h new file mode 100644 index 000000000000..3ab892208343 --- /dev/null +++ b/include/linux/platform_data/x86/clk-pmc-atom.h @@ -0,0 +1,44 @@ +/* + * Intel Atom platform clocks for BayTrail and CherryTrail SoC. + * + * Copyright (C) 2016, Intel Corporation + * Author: Irina Tirdea + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __PLATFORM_DATA_X86_CLK_PMC_ATOM_H +#define __PLATFORM_DATA_X86_CLK_PMC_ATOM_H + +/** + * struct pmc_clk - PMC platform clock configuration + * + * @name: identified, typically pmc_plt_clk_, x=[0..5] + * @freq: in Hz, 19.2MHz and 25MHz (Baytrail only) supported + * @parent_name: one of 'xtal' or 'osc' + */ +struct pmc_clk { + const char *name; + unsigned long freq; + const char *parent_name; +}; + +/** + * struct pmc_clk_data - common PMC clock configuration + * + * @base: PMC clock register base offset + * @clks: pointer to set of registered clocks, typically 0..5 + */ +struct pmc_clk_data { + void __iomem *base; + const struct pmc_clk *clks; +}; + +#endif /* __PLATFORM_DATA_X86_CLK_PMC_ATOM_H */ -- cgit v1.2.3 From 80a7581f38c0b2e83dc883a2125340b90b5635ec Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Mon, 23 Jan 2017 12:07:43 -0600 Subject: arch/x86/platform/atom: Move pmc_atom to drivers/platform/x86 The pmc_atom driver does not contain any architecture specific code. It only enables the SoC Power Management Controller driver for BayTrail and CherryTrail platforms. Move the pmc_atom driver from arch/x86/platform/atom to drivers/platform/x86. Also clean-up and reorder include files by alphabetical order in pmc_atom.h Signed-off-by: Irina Tirdea Signed-off-by: Pierre-Louis Bossart Acked-by: Thomas Gleixner Acked-by: Andy Shevchenko Signed-off-by: Stephen Boyd --- include/linux/platform_data/x86/pmc_atom.h | 158 +++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 include/linux/platform_data/x86/pmc_atom.h (limited to 'include/linux') diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h new file mode 100644 index 000000000000..aa8744c77c6d --- /dev/null +++ b/include/linux/platform_data/x86/pmc_atom.h @@ -0,0 +1,158 @@ +/* + * Intel Atom SOC Power Management Controller Header File + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef PMC_ATOM_H +#define PMC_ATOM_H + +/* ValleyView Power Control Unit PCI Device ID */ +#define PCI_DEVICE_ID_VLV_PMC 0x0F1C +/* CherryTrail Power Control Unit PCI Device ID */ +#define PCI_DEVICE_ID_CHT_PMC 0x229C + +/* PMC Memory mapped IO registers */ +#define PMC_BASE_ADDR_OFFSET 0x44 +#define PMC_BASE_ADDR_MASK 0xFFFFFE00 +#define PMC_MMIO_REG_LEN 0x100 +#define PMC_REG_BIT_WIDTH 32 + +/* BIOS uses FUNC_DIS to disable specific function */ +#define PMC_FUNC_DIS 0x34 +#define PMC_FUNC_DIS_2 0x38 + +/* CHT specific bits in FUNC_DIS2 register */ +#define BIT_FD_GMM BIT(3) +#define BIT_FD_ISH BIT(4) + +/* S0ix wake event control */ +#define PMC_S0IX_WAKE_EN 0x3C + +#define BIT_LPC_CLOCK_RUN BIT(4) +#define BIT_SHARED_IRQ_GPSC BIT(5) +#define BIT_ORED_DEDICATED_IRQ_GPSS BIT(18) +#define BIT_ORED_DEDICATED_IRQ_GPSC BIT(19) +#define BIT_SHARED_IRQ_GPSS BIT(20) + +#define PMC_WAKE_EN_SETTING ~(BIT_LPC_CLOCK_RUN | \ + BIT_SHARED_IRQ_GPSC | \ + BIT_ORED_DEDICATED_IRQ_GPSS | \ + BIT_ORED_DEDICATED_IRQ_GPSC | \ + BIT_SHARED_IRQ_GPSS) + +/* The timers acumulate time spent in sleep state */ +#define PMC_S0IR_TMR 0x80 +#define PMC_S0I1_TMR 0x84 +#define PMC_S0I2_TMR 0x88 +#define PMC_S0I3_TMR 0x8C +#define PMC_S0_TMR 0x90 +/* Sleep state counter is in units of of 32us */ +#define PMC_TMR_SHIFT 5 + +/* Power status of power islands */ +#define PMC_PSS 0x98 + +#define PMC_PSS_BIT_GBE BIT(0) +#define PMC_PSS_BIT_SATA BIT(1) +#define PMC_PSS_BIT_HDA BIT(2) +#define PMC_PSS_BIT_SEC BIT(3) +#define PMC_PSS_BIT_PCIE BIT(4) +#define PMC_PSS_BIT_LPSS BIT(5) +#define PMC_PSS_BIT_LPE BIT(6) +#define PMC_PSS_BIT_DFX BIT(7) +#define PMC_PSS_BIT_USH_CTRL BIT(8) +#define PMC_PSS_BIT_USH_SUS BIT(9) +#define PMC_PSS_BIT_USH_VCCS BIT(10) +#define PMC_PSS_BIT_USH_VCCA BIT(11) +#define PMC_PSS_BIT_OTG_CTRL BIT(12) +#define PMC_PSS_BIT_OTG_VCCS BIT(13) +#define PMC_PSS_BIT_OTG_VCCA_CLK BIT(14) +#define PMC_PSS_BIT_OTG_VCCA BIT(15) +#define PMC_PSS_BIT_USB BIT(16) +#define PMC_PSS_BIT_USB_SUS BIT(17) + +/* CHT specific bits in PSS register */ +#define PMC_PSS_BIT_CHT_UFS BIT(7) +#define PMC_PSS_BIT_CHT_UXD BIT(11) +#define PMC_PSS_BIT_CHT_UXD_FD BIT(12) +#define PMC_PSS_BIT_CHT_UX_ENG BIT(15) +#define PMC_PSS_BIT_CHT_USB_SUS BIT(16) +#define PMC_PSS_BIT_CHT_GMM BIT(17) +#define PMC_PSS_BIT_CHT_ISH BIT(18) +#define PMC_PSS_BIT_CHT_DFX_MASTER BIT(26) +#define PMC_PSS_BIT_CHT_DFX_CLUSTER1 BIT(27) +#define PMC_PSS_BIT_CHT_DFX_CLUSTER2 BIT(28) +#define PMC_PSS_BIT_CHT_DFX_CLUSTER3 BIT(29) +#define PMC_PSS_BIT_CHT_DFX_CLUSTER4 BIT(30) +#define PMC_PSS_BIT_CHT_DFX_CLUSTER5 BIT(31) + +/* These registers reflect D3 status of functions */ +#define PMC_D3_STS_0 0xA0 + +#define BIT_LPSS1_F0_DMA BIT(0) +#define BIT_LPSS1_F1_PWM1 BIT(1) +#define BIT_LPSS1_F2_PWM2 BIT(2) +#define BIT_LPSS1_F3_HSUART1 BIT(3) +#define BIT_LPSS1_F4_HSUART2 BIT(4) +#define BIT_LPSS1_F5_SPI BIT(5) +#define BIT_LPSS1_F6_XXX BIT(6) +#define BIT_LPSS1_F7_XXX BIT(7) +#define BIT_SCC_EMMC BIT(8) +#define BIT_SCC_SDIO BIT(9) +#define BIT_SCC_SDCARD BIT(10) +#define BIT_SCC_MIPI BIT(11) +#define BIT_HDA BIT(12) +#define BIT_LPE BIT(13) +#define BIT_OTG BIT(14) +#define BIT_USH BIT(15) +#define BIT_GBE BIT(16) +#define BIT_SATA BIT(17) +#define BIT_USB_EHCI BIT(18) +#define BIT_SEC BIT(19) +#define BIT_PCIE_PORT0 BIT(20) +#define BIT_PCIE_PORT1 BIT(21) +#define BIT_PCIE_PORT2 BIT(22) +#define BIT_PCIE_PORT3 BIT(23) +#define BIT_LPSS2_F0_DMA BIT(24) +#define BIT_LPSS2_F1_I2C1 BIT(25) +#define BIT_LPSS2_F2_I2C2 BIT(26) +#define BIT_LPSS2_F3_I2C3 BIT(27) +#define BIT_LPSS2_F4_I2C4 BIT(28) +#define BIT_LPSS2_F5_I2C5 BIT(29) +#define BIT_LPSS2_F6_I2C6 BIT(30) +#define BIT_LPSS2_F7_I2C7 BIT(31) + +#define PMC_D3_STS_1 0xA4 +#define BIT_SMB BIT(0) +#define BIT_OTG_SS_PHY BIT(1) +#define BIT_USH_SS_PHY BIT(2) +#define BIT_DFX BIT(3) + +/* CHT specific bits in PMC_D3_STS_1 register */ +#define BIT_STS_GMM BIT(1) +#define BIT_STS_ISH BIT(2) + +/* PMC I/O Registers */ +#define ACPI_BASE_ADDR_OFFSET 0x40 +#define ACPI_BASE_ADDR_MASK 0xFFFFFE00 +#define ACPI_MMIO_REG_LEN 0x100 + +#define PM1_CNT 0x4 +#define SLEEP_TYPE_MASK 0xFFFFECFF +#define SLEEP_TYPE_S5 0x1C00 +#define SLEEP_ENABLE 0x2000 + +extern int pmc_atom_read(int offset, u32 *value); +extern int pmc_atom_write(int offset, u32 value); + +#endif /* PMC_ATOM_H */ -- cgit v1.2.3 From 2e6f38cef8838a5edd6051c3110ecf06f37ec544 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 23 Jan 2017 12:07:45 -0600 Subject: platform/x86: fix typo in comment s/Acumulate/Accumulate/ Signed-off-by: Pierre-Louis Bossart Acked-by: Andy Shevchenko Signed-off-by: Stephen Boyd --- include/linux/platform_data/x86/pmc_atom.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h index aa8744c77c6d..e4905fe69c38 100644 --- a/include/linux/platform_data/x86/pmc_atom.h +++ b/include/linux/platform_data/x86/pmc_atom.h @@ -50,7 +50,7 @@ BIT_ORED_DEDICATED_IRQ_GPSC | \ BIT_SHARED_IRQ_GPSS) -/* The timers acumulate time spent in sleep state */ +/* The timers accumulate time spent in sleep state */ #define PMC_S0IR_TMR 0x80 #define PMC_S0I1_TMR 0x84 #define PMC_S0I2_TMR 0x88 -- cgit v1.2.3 From 4cb3e3782776f82400a5d135909dda466b813d45 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 25 Jan 2017 21:34:51 +0200 Subject: soc: samsung: pmu: Remove unused and duplicated defines The exynos-regs-pmu.h was never a complete list of PMU registers. It contained a lot of holes for registers which were not used. However, a lot of unused defines came along with porting the code from vendor kernel. Few of defines were also duplicated. Remove them so the file will be slightly smaller. Signed-off-by: Krzysztof Kozlowski --- include/linux/soc/samsung/exynos-regs-pmu.h | 72 +++-------------------------- 1 file changed, 7 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index d30186e2b609..9793502a6a57 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -7,7 +7,13 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. -*/ + * + * + * Notice: + * This is not a list of all Exynos Power Management Unit SFRs. + * There are too many of them, not mentioning subtle differences + * between SoCs. For now, put here only the used registers. + */ #ifndef __LINUX_SOC_EXYNOS_REGS_PMU_H #define __LINUX_SOC_EXYNOS_REGS_PMU_H __FILE__ @@ -38,7 +44,6 @@ #define EXYNOS_CORE_PO_RESET(n) ((1 << 4) << n) #define EXYNOS_WAKEUP_FROM_LOWPWR (1 << 28) #define EXYNOS_SWRESET 0x0400 -#define EXYNOS5440_SWRESET 0x00C4 #define S5P_WAKEUP_STAT 0x0600 #define S5P_EINT_WAKEUP_MASK 0x0604 @@ -136,12 +141,6 @@ #define EXYNOS_COMMON_OPTION(_nr) \ (EXYNOS_COMMON_CONFIGURATION(_nr) + 0x8) -#define EXYNOS_CORE_LOCAL_PWR_EN 0x3 - -#define EXYNOS_ARM_COMMON_STATUS 0x2504 -#define EXYNOS_COMMON_OPTION(_nr) \ - (EXYNOS_COMMON_CONFIGURATION(_nr) + 0x8) - #define EXYNOS_ARM_L2_CONFIGURATION 0x2600 #define EXYNOS_L2_CONFIGURATION(_nr) \ (EXYNOS_ARM_L2_CONFIGURATION + ((_nr) * 0x80)) @@ -149,18 +148,10 @@ (EXYNOS_L2_CONFIGURATION(_nr) + 0x4) #define EXYNOS_L2_OPTION(_nr) \ (EXYNOS_L2_CONFIGURATION(_nr) + 0x8) -#define EXYNOS_L2_COMMON_PWR_EN 0x3 - -#define EXYNOS_ARM_CORE_X_STATUS_OFFSET 0x4 - -#define EXYNOS5_APLL_SYSCLK_CONFIGURATION 0x2A00 -#define EXYNOS5_APLL_SYSCLK_STATUS 0x2A04 #define EXYNOS5_ARM_L2_OPTION 0x2608 #define EXYNOS5_USE_RETENTION BIT(4) -#define EXYNOS5_L2RSTDISABLE_VALUE BIT(3) - #define S5P_PAD_RET_MAUDIO_OPTION 0x3028 #define S5P_PAD_RET_MMC2_OPTION 0x30c8 #define S5P_PAD_RET_GPIO_OPTION 0x3108 @@ -411,7 +402,6 @@ #define EXYNOS5_SATA_MEM_SYS_PWR_REG 0x11FC #define EXYNOS5_PAD_RETENTION_DRAM_SYS_PWR_REG 0x1200 #define EXYNOS5_PAD_RETENTION_MAU_SYS_PWR_REG 0x1204 -#define EXYNOS5_PAD_RETENTION_EFNAND_SYS_PWR_REG 0x1208 #define EXYNOS5_PAD_RETENTION_GPIO_SYS_PWR_REG 0x1220 #define EXYNOS5_PAD_RETENTION_UART_SYS_PWR_REG 0x1224 #define EXYNOS5_PAD_RETENTION_MMCA_SYS_PWR_REG 0x1228 @@ -485,7 +475,6 @@ #define EXYNOS5420_SWRESET_KFC_SEL 0x3 /* Only for EXYNOS5420 */ -#define EXYNOS5420_ISP_ARM_OPTION 0x2488 #define EXYNOS5420_L2RSTDISABLE_VALUE BIT(3) #define EXYNOS5420_LPI_MASK 0x0004 @@ -494,9 +483,6 @@ #define EXYNOS5420_ATB_KFC BIT(13) #define EXYNOS5420_ATB_ISP_ARM BIT(19) #define EXYNOS5420_EMULATION BIT(31) -#define ATB_ISP_ARM BIT(12) -#define ATB_KFC BIT(13) -#define ATB_NOC BIT(14) #define EXYNOS5420_ARM_INTR_SPREAD_ENABLE 0x0100 #define EXYNOS5420_ARM_INTR_SPREAD_USE_STANDBYWFI 0x0104 @@ -510,11 +496,6 @@ #define EXYNOS5420_KFC_CORE_RESET(_nr) \ ((EXYNOS5420_KFC_CORE_RESET0 | EXYNOS5420_KFC_ETM_RESET0) << (_nr)) -#define EXYNOS5420_BB_CON1 0x0784 -#define EXYNOS5420_BB_SEL_EN BIT(31) -#define EXYNOS5420_BB_PMOS_EN BIT(7) -#define EXYNOS5420_BB_1300X 0XF - #define EXYNOS5420_ARM_CORE2_SYS_PWR_REG 0x1020 #define EXYNOS5420_DIS_IRQ_ARM_CORE2_LOCAL_SYS_PWR_REG 0x1024 #define EXYNOS5420_DIS_IRQ_ARM_CORE2_CENTRAL_SYS_PWR_REG 0x1028 @@ -546,15 +527,6 @@ #define EXYNOS5420_SPLL_SYSCLK_SYS_PWR_REG 0x1178 #define EXYNOS5420_INTRAM_MEM_SYS_PWR_REG 0x11B8 #define EXYNOS5420_INTROM_MEM_SYS_PWR_REG 0x11BC -#define EXYNOS5420_ONENANDXL_MEM_SYS_PWR 0x11C0 -#define EXYNOS5420_USBDEV_MEM_SYS_PWR 0x11CC -#define EXYNOS5420_USBDEV1_MEM_SYS_PWR 0x11D0 -#define EXYNOS5420_SDMMC_MEM_SYS_PWR 0x11D4 -#define EXYNOS5420_CSSYS_MEM_SYS_PWR 0x11D8 -#define EXYNOS5420_SECSS_MEM_SYS_PWR 0x11DC -#define EXYNOS5420_ROTATOR_MEM_SYS_PWR 0x11E0 -#define EXYNOS5420_INTRAM_MEM_SYS_PWR 0x11E4 -#define EXYNOS5420_INTROM_MEM_SYS_PWR 0x11E8 #define EXYNOS5420_PAD_RETENTION_JTAG_SYS_PWR_REG 0x1208 #define EXYNOS5420_PAD_RETENTION_DRAM_SYS_PWR_REG 0x1210 #define EXYNOS5420_PAD_RETENTION_UART_SYS_PWR_REG 0x1214 @@ -605,13 +577,7 @@ #define EXYNOS5420_CMU_RESET_MSC_SYS_PWR_REG 0x159C #define EXYNOS5420_CMU_RESET_FSYS_SYS_PWR_REG 0x15A0 #define EXYNOS5420_SFR_AXI_CGDIS1 0x15E4 -#define EXYNOS_ARM_CORE2_CONFIGURATION 0x2100 -#define EXYNOS5420_ARM_CORE2_OPTION 0x2108 -#define EXYNOS_ARM_CORE3_CONFIGURATION 0x2180 -#define EXYNOS5420_ARM_CORE3_OPTION 0x2188 -#define EXYNOS5420_ARM_COMMON_STATUS 0x2504 #define EXYNOS5420_ARM_COMMON_OPTION 0x2508 -#define EXYNOS5420_KFC_COMMON_STATUS 0x2584 #define EXYNOS5420_KFC_COMMON_OPTION 0x2588 #define EXYNOS5420_LOGIC_RESET_DURATION3 0x2D1C @@ -626,33 +592,9 @@ #define EXYNOS_PAD_RET_DRAM_OPTION 0x3008 #define EXYNOS_PAD_RET_MAUDIO_OPTION 0x3028 #define EXYNOS_PAD_RET_JTAG_OPTION 0x3048 -#define EXYNOS_PAD_RET_GPIO_OPTION 0x3108 -#define EXYNOS_PAD_RET_UART_OPTION 0x3128 -#define EXYNOS_PAD_RET_MMCA_OPTION 0x3148 -#define EXYNOS_PAD_RET_MMCB_OPTION 0x3168 #define EXYNOS_PAD_RET_EBIA_OPTION 0x3188 #define EXYNOS_PAD_RET_EBIB_OPTION 0x31A8 -#define EXYNOS_PS_HOLD_CONTROL 0x330C - -/* For SYS_PWR_REG */ -#define EXYNOS_SYS_PWR_CFG BIT(0) - -#define EXYNOS5420_MFC_CONFIGURATION 0x4060 -#define EXYNOS5420_MFC_STATUS 0x4064 -#define EXYNOS5420_MFC_OPTION 0x4068 -#define EXYNOS5420_G3D_CONFIGURATION 0x4080 -#define EXYNOS5420_G3D_STATUS 0x4084 -#define EXYNOS5420_G3D_OPTION 0x4088 -#define EXYNOS5420_DISP0_CONFIGURATION 0x40A0 -#define EXYNOS5420_DISP0_STATUS 0x40A4 -#define EXYNOS5420_DISP0_OPTION 0x40A8 -#define EXYNOS5420_DISP1_CONFIGURATION 0x40C0 -#define EXYNOS5420_DISP1_STATUS 0x40C4 -#define EXYNOS5420_DISP1_OPTION 0x40C8 -#define EXYNOS5420_MAU_CONFIGURATION 0x40E0 -#define EXYNOS5420_MAU_STATUS 0x40E4 -#define EXYNOS5420_MAU_OPTION 0x40E8 #define EXYNOS5420_FSYS2_OPTION 0x4168 #define EXYNOS5420_PSGEN_OPTION 0x4188 -- cgit v1.2.3 From ee55ae6194a5439bde3a3b8ee0abda63c610e740 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 25 Jan 2017 21:09:44 +0200 Subject: soc: samsung: pmu: Remove duplicated define for ARM_L2_OPTION register The register ARM_L2_OPTION (0x2608 in Exynos4 and Exynos5 PMU) was defined twice. Both names were used in the Exynos542x code. Simplify this. Signed-off-by: Krzysztof Kozlowski --- include/linux/soc/samsung/exynos-regs-pmu.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index 9793502a6a57..9786c62d7159 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -149,8 +149,7 @@ #define EXYNOS_L2_OPTION(_nr) \ (EXYNOS_L2_CONFIGURATION(_nr) + 0x8) -#define EXYNOS5_ARM_L2_OPTION 0x2608 -#define EXYNOS5_USE_RETENTION BIT(4) +#define EXYNOS_L2_USE_RETENTION BIT(4) #define S5P_PAD_RET_MAUDIO_OPTION 0x3028 #define S5P_PAD_RET_MMC2_OPTION 0x30c8 -- cgit v1.2.3 From 4f497a4310646f14ab518ab6108dbef07f4606af Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Thu, 15 Dec 2016 11:15:25 +0100 Subject: PM / QoS: Remove unneeded linux/miscdevice.h include pm_qos.h does not use any miscdevice, so this patch remove this unnecessary inclusion. Signed-off-by: Corentin Labbe Signed-off-by: Rafael J. Wysocki --- include/linux/pm_qos.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 0f65d36c2a75..d4d34791e463 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -6,7 +6,6 @@ */ #include #include -#include #include #include -- cgit v1.2.3 From 3aa26a3b2ea63c4d09420e74421370655aa1cf8f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 2 Jan 2017 14:41:02 +0530 Subject: PM / OPP: Rename dev_pm_opp_get_suspend_opp() and return OPP rate There is only one user of dev_pm_opp_get_suspend_opp() and that uses it to get the OPP rate for the suspend_opp. Rename dev_pm_opp_get_suspend_opp() as dev_pm_opp_get_suspend_opp_freq() and return the rate directly from it. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 0edd88f93904..7483ff291a8e 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -88,7 +88,7 @@ int dev_pm_opp_get_opp_count(struct device *dev); unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev); unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev); unsigned long dev_pm_opp_get_max_transition_latency(struct device *dev); -struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev); +unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev); struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, @@ -159,9 +159,9 @@ static inline unsigned long dev_pm_opp_get_max_transition_latency(struct device return 0; } -static inline struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device *dev) +static inline unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev) { - return NULL; + return 0; } static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, -- cgit v1.2.3 From dc2c9ad52af45ebecf9743aacb1916ebb2f1e848 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 2 Jan 2017 14:41:03 +0530 Subject: PM / OPP: Don't expose srcu_head to register notifiers Let the OPP core provide helpers to register notifiers for any device, instead of exposing srcu_head outside of the core. Signed-off-by: Viresh Kumar Acked-by: MyungJoo Ham Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 7483ff291a8e..66a02deeb03f 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -108,7 +108,9 @@ int dev_pm_opp_enable(struct device *dev, unsigned long freq); int dev_pm_opp_disable(struct device *dev, unsigned long freq); -struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev); +int dev_pm_opp_register_notifier(struct device *dev, struct notifier_block *nb); +int dev_pm_opp_unregister_notifier(struct device *dev, struct notifier_block *nb); + int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count); void dev_pm_opp_put_supported_hw(struct device *dev); @@ -202,10 +204,14 @@ static inline int dev_pm_opp_disable(struct device *dev, unsigned long freq) return 0; } -static inline struct srcu_notifier_head *dev_pm_opp_get_notifier( - struct device *dev) +static inline int dev_pm_opp_register_notifier(struct device *dev, struct notifier_block *nb) { - return ERR_PTR(-ENOTSUPP); + return -ENOTSUPP; +} + +static inline int dev_pm_opp_unregister_notifier(struct device *dev, struct notifier_block *nb) +{ + return -ENOTSUPP; } static inline int dev_pm_opp_set_supported_hw(struct device *dev, -- cgit v1.2.3 From 6a4bc2b4c3c9afd643e8da44dd2318a5c5f22cd0 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 26 Jan 2017 14:44:17 -0800 Subject: net: Fix ndo_setup_tc comment Commit 16e5cc647173 ("net: rework setup_tc ndo op to consume general tc operand") changed the ndo_setup_tc() signature, but did not update the comments in netdevice.h, so do that now. Signed-off-by: Florian Fainelli Acked-by: John Fastabend Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3868c32d98af..d63cacb67ea6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -964,11 +964,12 @@ struct netdev_xdp { * with PF and querying it may introduce a theoretical security risk. * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); - * int (*ndo_setup_tc)(struct net_device *dev, u8 tc) - * Called to setup 'tc' number of traffic classes in the net device. This - * is always called from the stack with the rtnl lock held and netif tx - * queues stopped. This allows the netdevice to perform queue management - * safely. + * int (*ndo_setup_tc)(struct net_device *dev, u32 handle, + * __be16 protocol, struct tc_to_netdev *tc); + * Called to setup any 'tc' scheduler, classifier or action on @dev. + * This is always called from the stack with the rtnl lock held and netif + * tx queues stopped. This allows the netdevice to perform queue + * management safely. * * Fiber Channel over Ethernet (FCoE) offload functions. * int (*ndo_fcoe_enable)(struct net_device *dev); -- cgit v1.2.3 From 07e4fead45e6e1932f0b960655ab554b6aab6a08 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 25 Jan 2017 08:06:40 -0800 Subject: blk-mq: create debugfs directory tree In preparation for putting blk-mq debugging information in debugfs, create a directory tree mirroring the one in sysfs: # tree -d /sys/kernel/debug/block /sys/kernel/debug/block |-- nvme0n1 | `-- mq | |-- 0 | | `-- cpu0 | |-- 1 | | `-- cpu1 | |-- 2 | | `-- cpu2 | `-- 3 | `-- cpu3 `-- vda `-- mq `-- 0 |-- cpu0 |-- cpu1 |-- cpu2 `-- cpu3 Also add the scaffolding for the actual files that will go in here, either under the hardware queue or software queue directories. Reviewed-by: Hannes Reinecke Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25564857f5f8..0ee283f3cffe 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -571,6 +571,11 @@ struct request_queue { struct list_head tag_set_list; struct bio_set *bio_split; +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_dir; + struct dentry *mq_debugfs_dir; +#endif + bool mq_sysfs_init_done; }; -- cgit v1.2.3 From 24af1ccfe12adddbe17d11801e1689791a4cc282 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 25 Jan 2017 14:32:13 -0800 Subject: sbitmap: add helpers for dumping to a seq_file This is useful debugging information that will be used in the blk-mq debugfs directory. Reviewed-by: Hannes Reinecke Signed-off-by: Omar Sandoval Changed 'weight' to 'busy'. Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index f017fd6e69c4..d4e0a204c118 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -258,6 +258,26 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) unsigned int sbitmap_weight(const struct sbitmap *sb); +/** + * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file. + * @sb: Bitmap to show. + * @m: struct seq_file to write to. + * + * This is intended for debugging. The format may change at any time. + */ +void sbitmap_show(struct sbitmap *sb, struct seq_file *m); + +/** + * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct + * seq_file. + * @sb: Bitmap to show. + * @m: struct seq_file to write to. + * + * This is intended for debugging. The output isn't guaranteed to be internally + * consistent. + */ +void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m); + /** * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific * memory node. @@ -370,4 +390,14 @@ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq, */ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); +/** + * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct + * seq_file. + * @sbq: Bitmap queue to show. + * @m: struct seq_file to write to. + * + * This is intended for debugging. The format may change at any time. + */ +void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m); + #endif /* __LINUX_SCALE_BITMAP_H */ -- cgit v1.2.3 From 50e1dab86aa2c10cbca2f754aae9542169403141 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 26 Jan 2017 14:42:34 -0700 Subject: blk-mq-sched: fix starvation for multiple hardware queues and shared tags If we have both multiple hardware queues and shared tag map between devices, we need to ensure that we propagate the hardware queue restart bit higher up. This is because we can get into a situation where we don't have any IO pending on a hardware queue, yet we fail getting a tag to start new IO. If that happens, it's not enough to mark the hardware queue as needing a restart, we need to bubble that up to the higher level queue as well. Signed-off-by: Jens Axboe Reviewed-by: Omar Sandoval Tested-by: Hannes Reinecke --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0ee283f3cffe..883b8abe4305 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -607,6 +607,7 @@ struct request_queue { #define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ #define QUEUE_FLAG_DAX 26 /* device supports DAX */ #define QUEUE_FLAG_STATS 27 /* track rq completion times */ +#define QUEUE_FLAG_RESTART 28 /* queue needs restart at completion */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ -- cgit v1.2.3 From c13660a08c8b3bb49def4374bfd414aaaa564662 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 26 Jan 2017 12:40:07 -0700 Subject: blk-mq-sched: change ->dispatch_requests() to ->dispatch_request() When we invoke dispatch_requests(), the scheduler empties everything into the passed in list. This isn't always a good thing, since it means that we remove items that we could have potentially merged with. Change the function to dispatch single requests at the time. If we do that, we can backoff exactly at the point where the device can't consume more IO, and leave the rest with the scheduler for better merging and future dispatch decision making. Signed-off-by: Jens Axboe Reviewed-by: Omar Sandoval Tested-by: Hannes Reinecke --- include/linux/elevator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index ecb96fd67c6d..b5825c4f06f7 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -92,7 +92,7 @@ struct elevator_mq_ops { struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *); void (*put_request)(struct request *); void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); - void (*dispatch_requests)(struct blk_mq_hw_ctx *, struct list_head *); + struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); bool (*has_work)(struct blk_mq_hw_ctx *); void (*completed_request)(struct blk_mq_hw_ctx *, struct request *); void (*started_request)(struct request *); -- cgit v1.2.3 From d35a00b8e33dab7385f724e713ae71c8be0a49f4 Mon Sep 17 00:00:00 2001 From: Felix Jia Date: Thu, 26 Jan 2017 16:59:17 +1300 Subject: net/ipv6: allow sysctl to change link-local address generation mode The address generation mode for IPv6 link-local can only be configured by netlink messages. This patch adds the ability to change the address generation mode via sysctl. v1 -> v2 Removed the rtnl lock and switch to use RCU lock to iterate through the netdev list. v2 -> v3 Removed the addrgenmode variable from the idev structure and use the systcl storage for the flag. Simplifed the logic for sysctl handling by removing the supported for all operation. Added support for more types of tunnel interfaces for link-local address generation. Based the patches from net-next. v3 -> v4 Removed unnecessary whitespace changes. Signed-off-by: Felix Jia Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 671d014e6429..71be5b330d21 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -69,6 +69,7 @@ struct ipv6_devconf { __s32 seg6_require_hmac; #endif __u32 enhanced_dad; + __u32 addr_gen_mode; struct ctl_table_header *sysctl_header; }; -- cgit v1.2.3 From f73f44eb00cb136990cfb7d40e436c13d7669ec8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Jan 2017 08:30:47 -0700 Subject: block: add a op_is_flush helper This centralizes the checks for bios that needs to be go into the flush state machine. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 0e5b1cd5113c..37c9a43c5e78 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -220,6 +220,15 @@ static inline bool op_is_write(unsigned int op) return (op & 1); } +/* + * Check if the bio or request is one that needs special treatment in the + * flush state machine. + */ +static inline bool op_is_flush(unsigned int op) +{ + return op & (REQ_FUA | REQ_PREFLUSH); +} + /* * Reads are always treated as synchronous, as are requests with the FUA or * PREFLUSH flag. Other operations may be marked as synchronous using the -- cgit v1.2.3 From 9d162ed69f51cbd9ee5a0c7e82aba7acc96362ff Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Fri, 27 Jan 2017 08:46:23 +0100 Subject: net: phy: micrel: add support for KSZ8795 This is adds support for the PHYs in the KSZ8795 5port managed switch. It will allow to detect the link between the switch and the soc and uses the same read_status functions as the KSZ8873MLL switch. Signed-off-by: Sean Nyekjaer Signed-off-by: David S. Miller --- include/linux/micrel_phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 257173e0095e..f541da68d1e7 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -35,6 +35,8 @@ #define PHY_ID_KSZ886X 0x00221430 #define PHY_ID_KSZ8863 0x00221435 +#define PHY_ID_KSZ8795 0x00221550 + /* struct phy_device dev_flags definitions */ #define MICREL_PHY_50MHZ_CLK 0x00000001 #define MICREL_PHY_FXEN 0x00000002 -- cgit v1.2.3 From 0fc9ae107669760c2a8658cb5b5876dbe525e08d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 27 Jan 2017 14:07:01 +0100 Subject: net: phy: broadcom: add support for BCM54210E MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's Broadcom PHY simply described as single-port RGMII 10/100/1000BASE-T PHY. It requires disabling delay skew and GTXCLK bits. Signed-off-by: Rafał Miłecki Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index cf93f1399d3e..5881d1fdc1fb 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -17,6 +17,7 @@ #define PHY_ID_BCM5482 0x0143bcb0 #define PHY_ID_BCM5411 0x00206070 #define PHY_ID_BCM5421 0x002060e0 +#define PHY_ID_BCM54210E 0x600d84a0 #define PHY_ID_BCM5464 0x002060b0 #define PHY_ID_BCM5461 0x002060c0 #define PHY_ID_BCM54612E 0x03625e60 -- cgit v1.2.3 From 2b368b234ec43abbf46f2983478c438d36e58134 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 27 Jan 2017 15:26:32 +0100 Subject: net: wan: Remove unused stats member from struct frad_local The stats member of struct frad_locl is used neither by the dlci nor the sdla driver, so it might as well be removed. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- include/linux/if_frad.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h index 4316aa173dde..46df7e565d6f 100644 --- a/include/linux/if_frad.h +++ b/include/linux/if_frad.h @@ -66,8 +66,6 @@ struct dlci_local struct frad_local { - struct net_device_stats stats; - /* devices which this FRAD is slaved to */ struct net_device *master[CONFIG_DLCI_MAX]; short dlci[CONFIG_DLCI_MAX]; -- cgit v1.2.3 From f617f27653c4d9f5b2aa43d567ac0405df889944 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 27 Jan 2017 15:02:26 +0000 Subject: net: implement netif_cond_dbg macro For reporting things that may or may not be serious, depending on some condition, netif_cond_dbg will check the condition and print the report at either dbg (if the condition is true) or the specified level. Suggested-by: Jon Cooper Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d63cacb67ea6..9511e5a1cfc6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4347,6 +4347,15 @@ do { \ }) #endif +/* if @cond then downgrade to debug, else print at @level */ +#define netif_cond_dbg(priv, type, netdev, cond, level, fmt, args...) \ + do { \ + if (cond) \ + netif_dbg(priv, type, netdev, fmt, ##args); \ + else \ + netif_ ## level(priv, type, netdev, fmt, ##args); \ + } while (0) + #if defined(VERBOSE_DEBUG) #define netif_vdbg netif_dbg #else -- cgit v1.2.3 From f5b98461cb8167ba362ad9f74c41d126b7becea7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 6 Jan 2017 19:32:01 +0100 Subject: random: use chacha20 for get_random_int/long Now that our crng uses chacha20, we can rely on its speedy characteristics for replacing MD5, while simultaneously achieving a higher security guarantee. Before the idea was to use these functions if you wanted random integers that aren't stupidly insecure but aren't necessarily secure either, a vague gray zone, that hopefully was "good enough" for its users. With chacha20, we can strengthen this claim, since either we're using an rdrand-like instruction, or we're using the same crng as /dev/urandom. And it's faster than what was before. We could have chosen to replace this with a SipHash-derived function, which might be slightly faster, but at the cost of having yet another RNG construction in the kernel. By moving to chacha20, we have a single RNG to analyze and verify, and we also already get good performance improvements on all platforms. Implementation-wise, rather than use a generic buffer for both get_random_int/long and memcpy based on the size needs, we use a specific buffer for 32-bit reads and for 64-bit reads. This way, we're guaranteed to always have aligned accesses on all platforms. While slightly more verbose in C, the assembly this generates is a lot simpler than otherwise. Finally, on 32-bit platforms where longs and ints are the same size, we simply alias get_random_int to get_random_long. Signed-off-by: Jason A. Donenfeld Suggested-by: Theodore Ts'o Cc: Theodore Ts'o Cc: Hannes Frederic Sowa Cc: Andy Lutomirski Signed-off-by: Theodore Ts'o --- include/linux/random.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 7bd2403e4fef..16ab429735a7 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -37,7 +37,6 @@ extern void get_random_bytes(void *buf, int nbytes); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); extern void get_random_bytes_arch(void *buf, int nbytes); -extern int random_int_secret_init(void); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; -- cgit v1.2.3 From c440408cf6901eeb2c09563397e24a9097907078 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 22 Jan 2017 16:34:08 +0100 Subject: random: convert get_random_int/long into get_random_u32/u64 Many times, when a user wants a random number, he wants a random number of a guaranteed size. So, thinking of get_random_int and get_random_long in terms of get_random_u32 and get_random_u64 makes it much easier to achieve this. It also makes the code simpler. On 32-bit platforms, get_random_int and get_random_long are both aliased to get_random_u32. On 64-bit platforms, int->u32 and long->u64. Signed-off-by: Jason A. Donenfeld Cc: Greg Kroah-Hartman Cc: Theodore Ts'o Signed-off-by: Theodore Ts'o --- include/linux/random.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index 16ab429735a7..ed5c3838780d 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -42,8 +42,21 @@ extern void get_random_bytes_arch(void *buf, int nbytes); extern const struct file_operations random_fops, urandom_fops; #endif -unsigned int get_random_int(void); -unsigned long get_random_long(void); +u32 get_random_u32(void); +u64 get_random_u64(void); +static inline unsigned int get_random_int(void) +{ + return get_random_u32(); +} +static inline unsigned long get_random_long(void) +{ +#if BITS_PER_LONG == 64 + return get_random_u64(); +#else + return get_random_u32(); +#endif +} + unsigned long randomize_page(unsigned long start, unsigned long range); u32 prandom_u32(void); -- cgit v1.2.3 From b18b6a9cef7f30e9a8b7738d5fc8d568cf660855 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sat, 21 Jan 2017 00:09:08 -0500 Subject: timers: Omit POSIX timer stuff from task_struct when disabled When CONFIG_POSIX_TIMERS is disabled, it is preferable to remove related structures from struct task_struct and struct signal_struct as they won't contain anything useful and shouldn't be relied upon by mistake. Code still referencing those structures is also disabled here. Signed-off-by: Nicolas Pitre Signed-off-by: John Stultz --- include/linux/init_task.h | 40 +++++++++++++++++++++++++--------------- include/linux/sched.h | 13 ++++++++++--- 2 files changed, 35 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 325f649d77ff..3a85d61f7614 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -42,6 +42,27 @@ extern struct fs_struct init_fs; #define INIT_PREV_CPUTIME(x) #endif +#ifdef CONFIG_POSIX_TIMERS +#define INIT_POSIX_TIMERS(s) \ + .posix_timers = LIST_HEAD_INIT(s.posix_timers), +#define INIT_CPU_TIMERS(s) \ + .cpu_timers = { \ + LIST_HEAD_INIT(s.cpu_timers[0]), \ + LIST_HEAD_INIT(s.cpu_timers[1]), \ + LIST_HEAD_INIT(s.cpu_timers[2]), \ + }, +#define INIT_CPUTIMER(s) \ + .cputimer = { \ + .cputime_atomic = INIT_CPUTIME_ATOMIC, \ + .running = false, \ + .checking_timer = false, \ + }, +#else +#define INIT_POSIX_TIMERS(s) +#define INIT_CPU_TIMERS(s) +#define INIT_CPUTIMER(s) +#endif + #define INIT_SIGNALS(sig) { \ .nr_threads = 1, \ .thread_head = LIST_HEAD_INIT(init_task.thread_node), \ @@ -49,14 +70,10 @@ extern struct fs_struct init_fs; .shared_pending = { \ .list = LIST_HEAD_INIT(sig.shared_pending.list), \ .signal = {{0}}}, \ - .posix_timers = LIST_HEAD_INIT(sig.posix_timers), \ - .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ + INIT_POSIX_TIMERS(sig) \ + INIT_CPU_TIMERS(sig) \ .rlim = INIT_RLIMITS, \ - .cputimer = { \ - .cputime_atomic = INIT_CPUTIME_ATOMIC, \ - .running = false, \ - .checking_timer = false, \ - }, \ + INIT_CPUTIMER(sig) \ INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ @@ -247,7 +264,7 @@ extern struct task_group root_task_group; .blocked = {{0}}, \ .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \ .journal_info = NULL, \ - .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ + INIT_CPU_TIMERS(tsk) \ .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ .timer_slack_ns = 50000, /* 50 usec default slack */ \ .pids = { \ @@ -274,13 +291,6 @@ extern struct task_group root_task_group; } -#define INIT_CPU_TIMERS(cpu_timers) \ -{ \ - LIST_HEAD_INIT(cpu_timers[0]), \ - LIST_HEAD_INIT(cpu_timers[1]), \ - LIST_HEAD_INIT(cpu_timers[2]), \ -} - /* Attach to the init_task data structure for proper alignment */ #define __init_task_data __attribute__((__section__(".data..init_task"))) diff --git a/include/linux/sched.h b/include/linux/sched.h index 4d1905245c7a..e8f6af59726d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -734,13 +734,14 @@ struct signal_struct { unsigned int is_child_subreaper:1; unsigned int has_child_subreaper:1; +#ifdef CONFIG_POSIX_TIMERS + /* POSIX.1b Interval Timers */ int posix_timer_id; struct list_head posix_timers; /* ITIMER_REAL timer for the process */ struct hrtimer real_timer; - struct pid *leader_pid; ktime_t it_real_incr; /* @@ -759,12 +760,16 @@ struct signal_struct { /* Earliest-expiration cache. */ struct task_cputime cputime_expires; + struct list_head cpu_timers[3]; + +#endif + + struct pid *leader_pid; + #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; #endif - struct list_head cpu_timers[3]; - struct pid *tty_old_pgrp; /* boolean value for session group leader */ @@ -1681,8 +1686,10 @@ struct task_struct { /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; +#ifdef CONFIG_POSIX_TIMERS struct task_cputime cputime_expires; struct list_head cpu_timers[3]; +#endif /* process credentials */ const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ -- cgit v1.2.3 From 5ea708d15a928f7a479987704203616d3274c03b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Jan 2017 14:52:44 +0300 Subject: block: simplify blk_init_allocated_queue Return an errno value instead of the passed in queue so that the callers don't have to keep track of two queues, and move the assignment of the request_fn and lock to the caller as passing them as argument doesn't simplify anything. While we're at it also remove two pointless NULL assignments, given that the request structure is zeroed on allocation. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 05675b1dfd20..6b1efc5760ea 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1137,8 +1137,7 @@ extern void blk_unprep_request(struct request *); extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id); extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); -extern struct request_queue *blk_init_allocated_queue(struct request_queue *, - request_fn_proc *, spinlock_t *); +extern int blk_init_allocated_queue(struct request_queue *); extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); -- cgit v1.2.3 From 6d247d7f71d1fa4b66a5f4da7b1daa21510d529b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Jan 2017 09:51:45 -0700 Subject: block: allow specifying size for extra command data This mirrors the blk-mq capabilities to allocate extra drivers-specific data behind struct request by setting a cmd_size field, as well as having a constructor / destructor for it. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6b1efc5760ea..461b7cf6af1d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -273,6 +273,8 @@ typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); typedef int (lld_busy_fn) (struct request_queue *q); typedef int (bsg_job_fn) (struct bsg_job *); +typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t); +typedef void (exit_rq_fn)(struct request_queue *, struct request *); enum blk_eh_timer_return { BLK_EH_NOT_HANDLED, @@ -408,6 +410,8 @@ struct request_queue { rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; lld_busy_fn *lld_busy_fn; + init_rq_fn *init_rq_fn; + exit_rq_fn *exit_rq_fn; const struct blk_mq_ops *mq_ops; @@ -577,6 +581,9 @@ struct request_queue { #endif bool mq_sysfs_init_done; + + size_t cmd_size; + void *rq_alloc_data; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ -- cgit v1.2.3 From 48b77ad6084481ef9330a5d2bee289966da0975b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Jan 2017 09:35:54 +0100 Subject: block: cleanup tracing A couple tweaks to the tracing code: - trace the request size for all requests - trace request sector and nr_sectors only for fs requests, enforced by helpers - drop SCSI CDB tracing - we have SCSI tracing for this and are going to me the CDB out of the generic struct request soon. With this the tracing code stops to know about BLOCK_PC requests entirely, it's just FS vs passthrough requests now, where the latter includes any driver-private requests. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index e417f080219a..a143a67a6f33 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -110,16 +110,16 @@ struct compat_blk_user_trace_setup { #endif -#if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK) +extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes); -static inline int blk_cmd_buf_len(struct request *rq) +static inline sector_t blk_rq_trace_sector(struct request *rq) { - return (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? rq->cmd_len * 3 : 1; + return (rq->cmd_type != REQ_TYPE_FS) ? 0 : blk_rq_pos(rq); } -extern void blk_dump_cmd(char *buf, struct request *rq); -extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes); - -#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */ +static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq) +{ + return (rq->cmd_type != REQ_TYPE_FS) ? 0 : blk_rq_sectors(rq); +} #endif -- cgit v1.2.3 From eb8db831be80692bf4bda3dfc55001daf64ec299 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 22 Jan 2017 18:32:46 +0100 Subject: dm: always defer request allocation to the owner of the request_queue DM already calls blk_mq_alloc_request on the request_queue of the underlying device if it is a blk-mq device. But now that we allow drivers to allocate additional data and initialize it ahead of time we need to do the same for all drivers. Doing so and using the new cmd_size infrastructure in the block layer greatly simplifies the dm-rq and mpath code, and should also make arbitrary combinations of SQ and MQ devices with SQ or MQ device mapper tables easily possible as a further step. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/device-mapper.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index ef7962e84444..a7e6903866fd 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -55,8 +55,6 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti); * = 2: The target wants to push back the io */ typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio); -typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, - union map_info *map_context); typedef int (*dm_clone_and_map_request_fn) (struct dm_target *ti, struct request *rq, union map_info *map_context, @@ -163,7 +161,6 @@ struct target_type { dm_ctr_fn ctr; dm_dtr_fn dtr; dm_map_fn map; - dm_map_request_fn map_rq; dm_clone_and_map_request_fn clone_and_map_rq; dm_release_clone_request_fn release_clone_rq; dm_endio_fn end_io; -- cgit v1.2.3 From 8ae94eb65be9425af4d57a4f4cfebfdf03081e93 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Jan 2017 15:25:02 +0300 Subject: block/bsg: move queue creation into bsg_setup_queue Simply the boilerplate code needed for bsg nodes a bit. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/bsg-lib.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 657a718c27d2..e34dde2da0ef 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -66,9 +66,8 @@ struct bsg_job { void bsg_job_done(struct bsg_job *job, int result, unsigned int reply_payload_rcv_len); -int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name, - bsg_job_fn *job_fn, int dd_job_size); -void bsg_request_fn(struct request_queue *q); +struct request_queue *bsg_setup_queue(struct device *dev, char *name, + bsg_job_fn *job_fn, int dd_job_size); void bsg_job_put(struct bsg_job *job); int __must_check bsg_job_get(struct bsg_job *job); -- cgit v1.2.3 From 82ed4db499b8598f16f8871261bff088d6b0597f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Jan 2017 09:46:29 +0100 Subject: block: split scsi_request out of struct request And require all drivers that want to support BLOCK_PC to allocate it as the first thing of their private data. To support this the legacy IDE and BSG code is switched to set cmd_size on their queues to let the block layer allocate the additional space. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 13 ------------- include/linux/ide.h | 8 +++++++- 2 files changed, 7 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 461b7cf6af1d..e4c5f284fe2d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -128,8 +128,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_NOMERGE_FLAGS \ (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) -#define BLK_MAX_CDB 16 - /* * Try to put the fields that are referenced together in the same cacheline. * @@ -227,17 +225,7 @@ struct request { int errors; - /* - * when request is used as a packet command carrier - */ - unsigned char __cmd[BLK_MAX_CDB]; - unsigned char *cmd; - unsigned short cmd_len; - unsigned int extra_len; /* length of alignment and padding */ - unsigned int sense_len; - unsigned int resid_len; /* residual count */ - void *sense; unsigned long deadline; struct list_head timeout_list; @@ -925,7 +913,6 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, int, gfp_t); -extern void blk_rq_set_block_pc(struct request *); extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, diff --git a/include/linux/ide.h b/include/linux/ide.h index a633898f36ac..086fbe172817 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -20,6 +20,7 @@ #include /* for request_sense */ #include +#include #include #include @@ -52,6 +53,11 @@ enum ata_cmd_type_bits { ((rq)->cmd_type == REQ_TYPE_ATA_PM_SUSPEND || \ (rq)->cmd_type == REQ_TYPE_ATA_PM_RESUME) +struct ide_request { + struct scsi_request sreq; + u8 sense[SCSI_SENSE_BUFFERSIZE]; +}; + /* Error codes returned in rq->errors to the higher part of the driver. */ enum { IDE_DRV_ERROR_GENERAL = 101, @@ -579,7 +585,7 @@ struct ide_drive_s { /* current sense rq and buffer */ bool sense_rq_armed; - struct request sense_rq; + struct request *sense_rq; struct request_sense sense_data; }; -- cgit v1.2.3 From 15f2e88ddd4bc9b2c6b6236162993b5caa80abb9 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 16 Dec 2016 14:46:09 -0500 Subject: radix tree: Add some implicit includes We were using spinlock_t and INIT_LIST_HEAD without including spinlock.h or list.h. They were being implicitly included through some other header file, but that's fragile. Signed-off-by: Matthew Wilcox --- include/linux/radix-tree.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 52bda854593b..13d8d741ca34 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -22,11 +22,13 @@ #define _LINUX_RADIX_TREE_H #include -#include -#include #include #include +#include +#include #include +#include +#include /* * The bottom two bits of the slot determine how the remaining bits in the -- cgit v1.2.3 From 35534c869c62f59203c1822769bbef14e894a9e9 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 19 Dec 2016 17:43:19 -0500 Subject: radix tree: constify some pointers If we're just getting the value of a tag, or looking up an entry, we won't modify the radix tree, so we can declare these functions as taking a const pointer. Mostly for documentation purposes, though it might help code generation. Signed-off-by: Matthew Wilcox --- include/linux/radix-tree.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 13d8d741ca34..32683c7c2e0d 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -125,7 +125,7 @@ do { \ (root)->rnode = NULL; \ } while (0) -static inline bool radix_tree_empty(struct radix_tree_root *root) +static inline bool radix_tree_empty(const struct radix_tree_root *root) { return root->rnode == NULL; } @@ -294,10 +294,10 @@ static inline int radix_tree_insert(struct radix_tree_root *root, { return __radix_tree_insert(root, index, 0, entry); } -void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, +void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index, struct radix_tree_node **nodep, void ***slotp); -void *radix_tree_lookup(struct radix_tree_root *, unsigned long); -void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); +void *radix_tree_lookup(const struct radix_tree_root *, unsigned long); +void **radix_tree_lookup_slot(const struct radix_tree_root *, unsigned long); typedef void (*radix_tree_update_node_t)(struct radix_tree_node *, void *); void __radix_tree_replace(struct radix_tree_root *root, struct radix_tree_node *node, @@ -316,10 +316,10 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long); void radix_tree_clear_tags(struct radix_tree_root *root, struct radix_tree_node *node, void **slot); -unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, +unsigned int radix_tree_gang_lookup(const struct radix_tree_root *, void **results, unsigned long first_index, unsigned int max_items); -unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root, +unsigned int radix_tree_gang_lookup_slot(const struct radix_tree_root *, void ***results, unsigned long *indices, unsigned long first_index, unsigned int max_items); int radix_tree_preload(gfp_t gfp_mask); @@ -330,19 +330,19 @@ void *radix_tree_tag_set(struct radix_tree_root *root, unsigned long index, unsigned int tag); void *radix_tree_tag_clear(struct radix_tree_root *root, unsigned long index, unsigned int tag); -int radix_tree_tag_get(struct radix_tree_root *root, +int radix_tree_tag_get(const struct radix_tree_root *, unsigned long index, unsigned int tag); void radix_tree_iter_tag_set(struct radix_tree_root *root, const struct radix_tree_iter *iter, unsigned int tag); unsigned int -radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, +radix_tree_gang_lookup_tag(const struct radix_tree_root *, void **results, unsigned long first_index, unsigned int max_items, unsigned int tag); unsigned int -radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, +radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *, void ***results, unsigned long first_index, unsigned int max_items, unsigned int tag); -int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); +int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag); static inline void radix_tree_preload_end(void) { @@ -395,7 +395,7 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start) * Also it fills @iter with data about chunk: position in the tree (index), * its end (next_index), and constructs a bit mask for tagged iterating (tags). */ -void **radix_tree_next_chunk(struct radix_tree_root *root, +void **radix_tree_next_chunk(const struct radix_tree_root *, struct radix_tree_iter *iter, unsigned flags); /** -- cgit v1.2.3 From 966d2b04e070bc040319aaebfec09e0144dc3341 Mon Sep 17 00:00:00 2001 From: Douglas Miller Date: Sat, 28 Jan 2017 06:42:20 -0600 Subject: percpu-refcount: fix reference leak during percpu-atomic transition percpu_ref_tryget() and percpu_ref_tryget_live() should return "true" IFF they acquire a reference. But the return value from atomic_long_inc_not_zero() is a long and may have high bits set, e.g. PERCPU_COUNT_BIAS, and the return value of the tryget routines is bool so the reference may actually be acquired but the routines return "false" which results in a reference leak since the caller assumes it does not need to do a corresponding percpu_ref_put(). This was seen when performing CPU hotplug during I/O, as hangs in blk_mq_freeze_queue_wait where percpu_ref_kill (blk_mq_freeze_queue_start) raced with percpu_ref_tryget (blk_mq_timeout_work). Sample stack trace: __switch_to+0x2c0/0x450 __schedule+0x2f8/0x970 schedule+0x48/0xc0 blk_mq_freeze_queue_wait+0x94/0x120 blk_mq_queue_reinit_work+0xb8/0x180 blk_mq_queue_reinit_prepare+0x84/0xa0 cpuhp_invoke_callback+0x17c/0x600 cpuhp_up_callbacks+0x58/0x150 _cpu_up+0xf0/0x1c0 do_cpu_up+0x120/0x150 cpu_subsys_online+0x64/0xe0 device_online+0xb4/0x120 online_store+0xb4/0xc0 dev_attr_store+0x68/0xa0 sysfs_kf_write+0x80/0xb0 kernfs_fop_write+0x17c/0x250 __vfs_write+0x6c/0x1e0 vfs_write+0xd0/0x270 SyS_write+0x6c/0x110 system_call+0x38/0xe0 Examination of the queue showed a single reference (no PERCPU_COUNT_BIAS, and __PERCPU_REF_DEAD, __PERCPU_REF_ATOMIC set) and no requests. However, conditions at the time of the race are count of PERCPU_COUNT_BIAS + 0 and __PERCPU_REF_DEAD and __PERCPU_REF_ATOMIC set. The fix is to make the tryget routines use an actual boolean internally instead of the atomic long result truncated to a int. Fixes: e625305b3907 percpu-refcount: make percpu_ref based on longs instead of ints Link: https://bugzilla.kernel.org/show_bug.cgi?id=190751 Signed-off-by: Douglas Miller Reviewed-by: Jens Axboe Signed-off-by: Tejun Heo Fixes: e625305b3907 ("percpu-refcount: make percpu_ref based on longs instead of ints") Cc: stable@vger.kernel.org # v3.18+ --- include/linux/percpu-refcount.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 1c7eec09e5eb..3a481a49546e 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -204,7 +204,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref) static inline bool percpu_ref_tryget(struct percpu_ref *ref) { unsigned long __percpu *percpu_count; - int ret; + bool ret; rcu_read_lock_sched(); @@ -238,7 +238,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) { unsigned long __percpu *percpu_count; - int ret = false; + bool ret = false; rcu_read_lock_sched(); -- cgit v1.2.3 From d732248fdb5c5434f2ab0c258ce25a7e2ff2521a Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 24 Jan 2017 14:41:41 +0100 Subject: iio: cros_ec: Add cros_ec barometer driver Handle the barometer sensor presented by the ChromeOS EC Sensor hub. Signed-off-by: Gwendal Grignou Signed-off-by: Enric Balletbo Serra Signed-off-by: Jonathan Cameron --- include/linux/mfd/cros_ec_commands.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 1683003603f3..098c3501ad2c 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -1441,7 +1441,8 @@ enum motionsensor_type { MOTIONSENSE_TYPE_PROX = 3, MOTIONSENSE_TYPE_LIGHT = 4, MOTIONSENSE_TYPE_ACTIVITY = 5, - MOTIONSENSE_TYPE_MAX + MOTIONSENSE_TYPE_BARO = 6, + MOTIONSENSE_TYPE_MAX, }; /* List of motion sensor locations. */ -- cgit v1.2.3 From cefae80249f6bf3fdbc7c3a00bc43deb4c1bccf0 Mon Sep 17 00:00:00 2001 From: Luis Oliveira Date: Thu, 26 Jan 2017 17:45:32 +0000 Subject: i2c: core: helper function to detect slave mode This function has the purpose of mode detection by checking the device nodes for a reg matching with the I2C_OWN_SLAVE_ADDREESS flag. Currently only checks using OF functions (ACPI slave not supported yet). Signed-off-by: Luis Oliveira Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 4b45ec46161f..f0ba4bac7452 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -282,6 +282,7 @@ enum i2c_slave_event { extern int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb); extern int i2c_slave_unregister(struct i2c_client *client); +extern bool i2c_detect_slave_mode(struct device *dev); static inline int i2c_slave_event(struct i2c_client *client, enum i2c_slave_event event, u8 *val) -- cgit v1.2.3 From 0a595ebaaa6b53a2226d3fee2a2fd616ea5ba378 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 14 Dec 2016 10:12:56 -0800 Subject: f2fs: support IO alignment for DATA and NODE writes This patch implements IO alignment by filling dummy blocks in DATA and NODE write bios. If we can guarantee, for example, 32KB or 64KB for such the IOs, we can eliminate underlying dummy page problem which FTL conducts in order to close MLC or TLC partial written pages. Note that, - it requires "-o mode=lfs". - IO size should be power of 2, not exceed BIO_MAX_PAGES, 256. - read IO is still 4KB. - do checkpoint at fsync, if dummy NODE page was written. Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index cea41a124a80..f0748524ca8c 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -36,6 +36,12 @@ #define F2FS_NODE_INO(sbi) (sbi->node_ino_num) #define F2FS_META_INO(sbi) (sbi->meta_ino_num) +#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ +#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */ +#define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */ +#define F2FS_IO_SIZE_BITS(sbi) ((sbi)->write_io_size_bits) /* power of 2 */ +#define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1) + /* This flag is used by node and meta inodes, and by recovery */ #define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) #define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM) -- cgit v1.2.3 From 0cb8eb30d425d2d2ae28ab630596c44a158784c4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 29 Jan 2017 14:42:52 +0100 Subject: leds: class: Add new optional brightness_hw_changed attribute Some LEDs may have their brightness level changed autonomously (outside of kernel control) by hardware / firmware. This commit adds support for an optional brightness_hw_changed attribute to signal such changes to userspace (if a driver can detect them): What: /sys/class/leds//brightness_hw_changed Date: January 2017 KernelVersion: 4.11 Description: Last hardware set brightness level for this LED. Some LEDs may be changed autonomously by hardware/firmware. Only LEDs where this happens and the driver can detect this, will have this file. This file supports poll() to detect when the hardware changes the brightness. Reading this file will return the last brightness level set by the hardware, this may be different from the current brightness. Drivers which want to support this, simply add LED_BRIGHT_HW_CHANGED to their flags field and call led_classdev_notify_brightness_hw_changed() with the hardware set brightness when they detect a hardware / firmware triggered brightness change. Signed-off-by: Hans de Goede Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- include/linux/leds.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index bb50d0151e75..38c0bd7ca107 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -13,6 +13,7 @@ #define __LINUX_LEDS_H_INCLUDED #include +#include #include #include #include @@ -47,6 +48,7 @@ struct led_classdev { #define LED_DEV_CAP_FLASH (1 << 18) #define LED_HW_PLUGGABLE (1 << 19) #define LED_PANIC_INDICATOR (1 << 20) +#define LED_BRIGHT_HW_CHANGED (1 << 21) /* set_brightness_work / blink_timer flags, atomic, private. */ unsigned long work_flags; @@ -111,6 +113,11 @@ struct led_classdev { bool activated; #endif +#ifdef CONFIG_LEDS_BRIGHTNESS_HW_CHANGED + int brightness_hw_changed; + struct kernfs_node *brightness_hw_changed_kn; +#endif + /* Ensures consistent access to the LED Flash Class device */ struct mutex led_access; }; @@ -423,4 +430,12 @@ static inline void ledtrig_cpu(enum cpu_led_event evt) } #endif +#ifdef CONFIG_LEDS_BRIGHTNESS_HW_CHANGED +extern void led_classdev_notify_brightness_hw_changed( + struct led_classdev *led_cdev, enum led_brightness brightness); +#else +static inline void led_classdev_notify_brightness_hw_changed( + struct led_classdev *led_cdev, enum led_brightness brightness) { } +#endif + #endif /* __LINUX_LEDS_H_INCLUDED */ -- cgit v1.2.3 From f1712c73714088a7252d276a57126d56c7d37e64 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Jan 2017 08:11:44 -0800 Subject: can: Fix kernel panic at security_sock_rcv_skb Zhang Yanmin reported crashes [1] and provided a patch adding a synchronize_rcu() call in can_rx_unregister() The main problem seems that the sockets themselves are not RCU protected. If CAN uses RCU for delivery, then sockets should be freed only after one RCU grace period. Recent kernels could use sock_set_flag(sk, SOCK_RCU_FREE), but let's ease stable backports with the following fix instead. [1] BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] selinux_socket_sock_rcv_skb+0x65/0x2a0 Call Trace: [] security_sock_rcv_skb+0x4c/0x60 [] sk_filter+0x41/0x210 [] sock_queue_rcv_skb+0x53/0x3a0 [] raw_rcv+0x2a3/0x3c0 [] can_rcv_filter+0x12b/0x370 [] can_receive+0xd9/0x120 [] can_rcv+0xab/0x100 [] __netif_receive_skb_core+0xd8c/0x11f0 [] __netif_receive_skb+0x24/0xb0 [] process_backlog+0x127/0x280 [] net_rx_action+0x33b/0x4f0 [] __do_softirq+0x184/0x440 [] do_softirq_own_stack+0x1c/0x30 [] do_softirq.part.18+0x3b/0x40 [] do_softirq+0x1d/0x20 [] netif_rx_ni+0xe5/0x110 [] slcan_receive_buf+0x507/0x520 [] flush_to_ldisc+0x21c/0x230 [] process_one_work+0x24f/0x670 [] worker_thread+0x9d/0x6f0 [] ? rescuer_thread+0x480/0x480 [] kthread+0x12c/0x150 [] ret_from_fork+0x3f/0x70 Reported-by: Zhang Yanmin Signed-off-by: Eric Dumazet Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- include/linux/can/core.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/core.h b/include/linux/can/core.h index a0875001b13c..df08a41d5be5 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -45,10 +45,9 @@ struct can_proto { extern int can_proto_register(const struct can_proto *cp); extern void can_proto_unregister(const struct can_proto *cp); -extern int can_rx_register(struct net_device *dev, canid_t can_id, - canid_t mask, - void (*func)(struct sk_buff *, void *), - void *data, char *ident); +int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, + void (*func)(struct sk_buff *, void *), + void *data, char *ident, struct sock *sk); extern void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, -- cgit v1.2.3 From 40be0dda0725886b623d67868db3219a2e74683b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sat, 28 Jan 2017 15:15:42 +0100 Subject: net: add devm version of alloc_etherdev_mqs function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds devm_alloc_etherdev_mqs function and devm_alloc_etherdev macro. These can be used for simpler netdev allocation without having to care about calling free_netdev. Thanks to this change drivers, their error paths and removal paths may get simpler by a bit. Signed-off-by: Rafał Miłecki Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 42add77ae47d..c62b709b1ce0 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -54,6 +54,11 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1) #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count) +struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv, + unsigned int txqs, + unsigned int rxqs); +#define devm_alloc_etherdev(dev, sizeof_priv) devm_alloc_etherdev_mqs(dev, sizeof_priv, 1, 1) + struct sk_buff **eth_gro_receive(struct sk_buff **head, struct sk_buff *skb); int eth_gro_complete(struct sk_buff *skb, int nhoff); -- cgit v1.2.3 From f067a982cefa8df5642212bb0c7e25974831f781 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 23 Jan 2017 10:11:42 +0530 Subject: PM / OPP: Add 'struct kref' to OPP table Add kref to struct opp_table for easier accounting of the OPP table. Note that the new routine dev_pm_opp_get_opp_table() takes the reference from under the opp_table_lock, which guarantees that the OPP table doesn't get freed unless dev_pm_opp_put_opp_table() is called for the OPP table. Two separate release mechanisms are added: locked and unlocked. In unlocked version the routines aren't required to take/drop opp_table_lock as the callers have already done that. This is required to avoid breaking git bisect, otherwise we may get lockdeps between commits. Once all the users of OPP table are updated the unlocked version shall be removed. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 66a02deeb03f..d867c6b25f9a 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -78,6 +78,9 @@ struct dev_pm_set_opp_data { #if defined(CONFIG_PM_OPP) +struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); +void dev_pm_opp_put_opp_table(struct opp_table *opp_table); + unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp); unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp); @@ -126,6 +129,13 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) void dev_pm_opp_remove_table(struct device *dev); void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask); #else +static inline struct opp_table *dev_pm_opp_get_opp_table(struct device *dev) +{ + return ERR_PTR(-ENOTSUPP); +} + +static inline void dev_pm_opp_put_opp_table(struct opp_table *opp_table) {} + static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { return 0; -- cgit v1.2.3 From fa30184d192ec78d443cf6d3abc37d9eb3b9253e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 23 Jan 2017 10:11:43 +0530 Subject: PM / OPP: Return opp_table from dev_pm_opp_set_*() routines Now that we have proper kernel reference infrastructure in place for OPP tables, use it to guarantee that the OPP table isn't freed while being used by the callers of dev_pm_opp_set_*() APIs. Make them all return the pointer to the OPP table after taking its reference and put the reference back with dev_pm_opp_put_*() APIs. Now that the OPP table wouldn't get freed while these routines are executing after dev_pm_opp_get_opp_table() is called, there is no need to take opp_table_lock. Drop them as well. Remove the rcu specific comments from these routines as they aren't relevant anymore. Note that prototypes of dev_pm_opp_{set|put}_regulators() were already updated by another patch. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index d867c6b25f9a..99787cbcaab2 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -114,15 +114,14 @@ int dev_pm_opp_disable(struct device *dev, unsigned long freq); int dev_pm_opp_register_notifier(struct device *dev, struct notifier_block *nb); int dev_pm_opp_unregister_notifier(struct device *dev, struct notifier_block *nb); -int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, - unsigned int count); -void dev_pm_opp_put_supported_hw(struct device *dev); -int dev_pm_opp_set_prop_name(struct device *dev, const char *name); -void dev_pm_opp_put_prop_name(struct device *dev); +struct opp_table *dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, unsigned int count); +void dev_pm_opp_put_supported_hw(struct opp_table *opp_table); +struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name); +void dev_pm_opp_put_prop_name(struct opp_table *opp_table); struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count); void dev_pm_opp_put_regulators(struct opp_table *opp_table); -int dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); -void dev_pm_opp_register_put_opp_helper(struct device *dev); +struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); +void dev_pm_opp_register_put_opp_helper(struct opp_table *opp_table); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask); int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); @@ -224,29 +223,29 @@ static inline int dev_pm_opp_unregister_notifier(struct device *dev, struct noti return -ENOTSUPP; } -static inline int dev_pm_opp_set_supported_hw(struct device *dev, - const u32 *versions, - unsigned int count) +static inline struct opp_table *dev_pm_opp_set_supported_hw(struct device *dev, + const u32 *versions, + unsigned int count) { - return -ENOTSUPP; + return ERR_PTR(-ENOTSUPP); } -static inline void dev_pm_opp_put_supported_hw(struct device *dev) {} +static inline void dev_pm_opp_put_supported_hw(struct opp_table *opp_table) {} -static inline int dev_pm_opp_register_set_opp_helper(struct device *dev, +static inline struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)) { - return -ENOTSUPP; + return ERR_PTR(-ENOTSUPP); } -static inline void dev_pm_opp_register_put_opp_helper(struct device *dev) {} +static inline void dev_pm_opp_register_put_opp_helper(struct opp_table *opp_table) {} -static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name) +static inline struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) { - return -ENOTSUPP; + return ERR_PTR(-ENOTSUPP); } -static inline void dev_pm_opp_put_prop_name(struct device *dev) {} +static inline void dev_pm_opp_put_prop_name(struct opp_table *opp_table) {} static inline struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count) { -- cgit v1.2.3 From 7034764a1e4a6edbb60914e89aad8384e3fe5d17 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 23 Jan 2017 10:11:46 +0530 Subject: PM / OPP: Add 'struct kref' to struct dev_pm_opp Add kref to struct dev_pm_opp for easier accounting of the OPPs. Note that the OPPs are freed under the opp_table->lock mutex only. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 99787cbcaab2..731d548657aa 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -102,6 +102,7 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, unsigned long *freq); +void dev_pm_opp_put(struct dev_pm_opp *opp); int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt); @@ -193,6 +194,8 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev, return ERR_PTR(-ENOTSUPP); } +static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {} + static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) { -- cgit v1.2.3 From b526a314263ea217b8fa9758dca5dc245fd49997 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 22 Jan 2017 17:14:08 +0100 Subject: pwm: Try to load modules during pwm_get() Add a module name string to the pwm_lookup struct and if specified try to load the module using request_module() if pwmchip_find_by_name() is unable to find the PWM chip. This is a last resort to work around drivers that can't - and can't be made to - deal with deferred probe. Signed-off-by: Hans de Goede [thierry.reding@gmail.com: rename new macro, reword commit message] [thierry.reding@gmail.com: add comment explaining use-case] Signed-off-by: Thierry Reding --- include/linux/pwm.h | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index eae215ef1b2c..08fad7c6a471 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -603,18 +603,25 @@ struct pwm_lookup { const char *con_id; unsigned int period; enum pwm_polarity polarity; + const char *module; /* optional, may be NULL */ }; -#define PWM_LOOKUP(_provider, _index, _dev_id, _con_id, _period, _polarity) \ - { \ - .provider = _provider, \ - .index = _index, \ - .dev_id = _dev_id, \ - .con_id = _con_id, \ - .period = _period, \ - .polarity = _polarity \ +#define PWM_LOOKUP_WITH_MODULE(_provider, _index, _dev_id, _con_id, \ + _period, _polarity, _module) \ + { \ + .provider = _provider, \ + .index = _index, \ + .dev_id = _dev_id, \ + .con_id = _con_id, \ + .period = _period, \ + .polarity = _polarity, \ + .module = _module, \ } +#define PWM_LOOKUP(_provider, _index, _dev_id, _con_id, _period, _polarity) \ + PWM_LOOKUP_WITH_MODULE(_provider, _index, _dev_id, _con_id, _period, \ + _polarity, NULL) + #if IS_ENABLED(CONFIG_PWM) void pwm_add_table(struct pwm_lookup *table, size_t num); void pwm_remove_table(struct pwm_lookup *table, size_t num); -- cgit v1.2.3 From 61babe943938bb41fcd7e2b1dec7d1537ea3892f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 21 Nov 2016 18:32:08 -0800 Subject: mtd: nand: fix nand.h kernel-doc warnings Fix kernel-doc warnings in : ..//include/linux/mtd/nand.h:658: warning: No description found for parameter 'tCEH_min' ..//include/linux/mtd/nand.h:877: warning: No description found for parameter 'data_interface' Fixes: eee64b700e26 ("mtd: nand: Introduce nand_data_interface") Signed-off-by: Randy Dunlap Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index c5f3a012ae62..f67915c7726f 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -615,7 +615,7 @@ struct nand_buffers { * @tALS_min: ALE setup time * @tAR_min: ALE to RE# delay * @tCEA_max: CE# access time - * @tCEH_min: + * @tCEH_min: CE# high hold time * @tCH_min: CE# hold time * @tCHZ_max: CE# high to output hi-Z * @tCLH_min: CLE hold time @@ -801,6 +801,7 @@ nand_get_sdr_timings(const struct nand_data_interface *conf) * supported, 0 otherwise. * @jedec_params: [INTERN] holds the JEDEC parameter page when JEDEC is * supported, 0 otherwise. + * @data_interface: [INTERN] NAND interface timing information * @read_retries: [INTERN] the number of read retry modes supported * @onfi_set_features: [REPLACEABLE] set the features for ONFI nand * @onfi_get_features: [REPLACEABLE] get the features for ONFI nand -- cgit v1.2.3 From 4404d7d821c33ac8105f1d52deb60f736d7c6a06 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 18 Dec 2016 12:34:55 +0100 Subject: mtd: nand: fsmc: remove stale non-DT probe path The FSMC driver has an execution path and a header file in that serves to support passing in platform data through board files, albeit no upstream users of this mechanism exist. The header file also contains function headers for functions that do not exist in the kernel. Delete this and move the platform data struct, parsing and handling into the driver, assume we are using OF and make the driver depend on OF, remove the ifdefs making that optional. Cc: Viresh Kumar Cc: Stefan Roese Cc: Vipin Kumar Signed-off-by: Linus Walleij Reviewed-by: Stefan Roese Acked-by: Viresh Kumar Signed-off-by: Boris Brezillon --- include/linux/mtd/fsmc.h | 156 ----------------------------------------------- 1 file changed, 156 deletions(-) delete mode 100644 include/linux/mtd/fsmc.h (limited to 'include/linux') diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h deleted file mode 100644 index ad3c3488073c..000000000000 --- a/include/linux/mtd/fsmc.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * incude/mtd/fsmc.h - * - * ST Microelectronics - * Flexible Static Memory Controller (FSMC) - * platform data interface and header file - * - * Copyright © 2010 ST Microelectronics - * Vipin Kumar - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#ifndef __MTD_FSMC_H -#define __MTD_FSMC_H - -#include -#include -#include -#include -#include -#include - -#define FSMC_NAND_BW8 1 -#define FSMC_NAND_BW16 2 - -#define FSMC_MAX_NOR_BANKS 4 -#define FSMC_MAX_NAND_BANKS 4 - -#define FSMC_FLASH_WIDTH8 1 -#define FSMC_FLASH_WIDTH16 2 - -/* fsmc controller registers for NOR flash */ -#define CTRL 0x0 - /* ctrl register definitions */ - #define BANK_ENABLE (1 << 0) - #define MUXED (1 << 1) - #define NOR_DEV (2 << 2) - #define WIDTH_8 (0 << 4) - #define WIDTH_16 (1 << 4) - #define RSTPWRDWN (1 << 6) - #define WPROT (1 << 7) - #define WRT_ENABLE (1 << 12) - #define WAIT_ENB (1 << 13) - -#define CTRL_TIM 0x4 - /* ctrl_tim register definitions */ - -#define FSMC_NOR_BANK_SZ 0x8 -#define FSMC_NOR_REG_SIZE 0x40 - -#define FSMC_NOR_REG(base, bank, reg) (base + \ - FSMC_NOR_BANK_SZ * (bank) + \ - reg) - -/* fsmc controller registers for NAND flash */ -#define PC 0x00 - /* pc register definitions */ - #define FSMC_RESET (1 << 0) - #define FSMC_WAITON (1 << 1) - #define FSMC_ENABLE (1 << 2) - #define FSMC_DEVTYPE_NAND (1 << 3) - #define FSMC_DEVWID_8 (0 << 4) - #define FSMC_DEVWID_16 (1 << 4) - #define FSMC_ECCEN (1 << 6) - #define FSMC_ECCPLEN_512 (0 << 7) - #define FSMC_ECCPLEN_256 (1 << 7) - #define FSMC_TCLR_1 (1) - #define FSMC_TCLR_SHIFT (9) - #define FSMC_TCLR_MASK (0xF) - #define FSMC_TAR_1 (1) - #define FSMC_TAR_SHIFT (13) - #define FSMC_TAR_MASK (0xF) -#define STS 0x04 - /* sts register definitions */ - #define FSMC_CODE_RDY (1 << 15) -#define COMM 0x08 - /* comm register definitions */ - #define FSMC_TSET_0 0 - #define FSMC_TSET_SHIFT 0 - #define FSMC_TSET_MASK 0xFF - #define FSMC_TWAIT_6 6 - #define FSMC_TWAIT_SHIFT 8 - #define FSMC_TWAIT_MASK 0xFF - #define FSMC_THOLD_4 4 - #define FSMC_THOLD_SHIFT 16 - #define FSMC_THOLD_MASK 0xFF - #define FSMC_THIZ_1 1 - #define FSMC_THIZ_SHIFT 24 - #define FSMC_THIZ_MASK 0xFF -#define ATTRIB 0x0C -#define IOATA 0x10 -#define ECC1 0x14 -#define ECC2 0x18 -#define ECC3 0x1C -#define FSMC_NAND_BANK_SZ 0x20 - -#define FSMC_NAND_REG(base, bank, reg) (base + FSMC_NOR_REG_SIZE + \ - (FSMC_NAND_BANK_SZ * (bank)) + \ - reg) - -#define FSMC_BUSY_WAIT_TIMEOUT (1 * HZ) - -struct fsmc_nand_timings { - uint8_t tclr; - uint8_t tar; - uint8_t thiz; - uint8_t thold; - uint8_t twait; - uint8_t tset; -}; - -enum access_mode { - USE_DMA_ACCESS = 1, - USE_WORD_ACCESS, -}; - -/** - * fsmc_nand_platform_data - platform specific NAND controller config - * @nand_timings: timing setup for the physical NAND interface - * @partitions: partition table for the platform, use a default fallback - * if this is NULL - * @nr_partitions: the number of partitions in the previous entry - * @options: different options for the driver - * @width: bus width - * @bank: default bank - * @select_bank: callback to select a certain bank, this is - * platform-specific. If the controller only supports one bank - * this may be set to NULL - */ -struct fsmc_nand_platform_data { - struct fsmc_nand_timings *nand_timings; - struct mtd_partition *partitions; - unsigned int nr_partitions; - unsigned int options; - unsigned int width; - unsigned int bank; - - enum access_mode mode; - - void (*select_bank)(uint32_t bank, uint32_t busw); - - /* priv structures for dma accesses */ - void *read_dma_priv; - void *write_dma_priv; -}; - -extern int __init fsmc_nor_init(struct platform_device *pdev, - unsigned long base, uint32_t bank, uint32_t width); -extern void __init fsmc_init_board_info(struct platform_device *pdev, - struct mtd_partition *partitions, unsigned int nr_partitions, - unsigned int width); - -#endif /* __MTD_FSMC_H */ -- cgit v1.2.3 From 058fe1c0440e68a1ba3c2270ae43e9f0298b27d8 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Wed, 18 Jan 2017 11:24:53 -0800 Subject: perf/core: Make cgroup switch visit only cpuctxs with cgroup events This patch follows from a conversation in CQM/CMT's last series about speeding up the context switch for cgroup events: https://patchwork.kernel.org/patch/9478617/ This is a low-hanging fruit optimization. It replaces the iteration over the "pmus" list in cgroup switch by an iteration over a new list that contains only cpuctxs with at least one cgroup event. This is necessary because the number of PMUs have increased over the years e.g modern x86 server systems have well above 50 PMUs. The iteration over the full PMU list is unneccessary and can be costly in heavy cache contention scenarios. Below are some instrumentation measurements with 10, 50 and 90 percentiles of the total cost of context switch before and after this optimization for a simple array read/write microbenchark. Contention Level Nr events Before (us) After (us) Median L2 L3 types (10%, 50%, 90%) (10%, 50%, 90% Speedup -------------------------------------------------------------------------- Low Low 1 (1.72, 2.42, 5.85) (1.35, 1.64, 5.46) 29% High Low 1 (2.08, 4.56, 19.8) (1720, 2.20, 13.7) 51% High High 1 (2.86, 10.4, 12.7) (2.54, 4.32, 12.1) 58% Low Low 2 (1.98, 3.20, 6.89) (1.68, 2.41, 8.89) 24% High Low 2 (2.48, 5.28, 22.4) (2150, 3.69, 14.6) 30% High High 2 (3.32, 8.09, 13.9) (2.80, 5.15, 13.7) 36% where: 1 event type = cycles 2 event types = cycles,intel_cqm/llc_occupancy/ Contention L2 Low: workset < L2 cache size. High: " >> L2 " " . Contention L3 Low: workset of task on all sockets < L3 cache size. High: " " " " " " >> L3 " " . Median Speedup is (50%ile Before - 50%ile After) / 50%ile Before Unsurprisingly, the benefits of this optimization decrease with the number of cpuctxs with a cgroup events, yet, is never detrimental. Tested-by: Mark Rutland Signed-off-by: David Carrillo-Cisneros Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Dave Hansen Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Paul Turner Cc: Peter Zijlstra Cc: Srinivas Pandruvada Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vikas Shivappa Cc: Vince Weaver Cc: Vince Weaver Link: http://lkml.kernel.org/r/20170118192454.58008-2-davidcc@google.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 78ed8105e64d..dfa725723f28 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -788,6 +788,7 @@ struct perf_cpu_context { struct pmu *unique_pmu; #ifdef CONFIG_CGROUP_PERF struct perf_cgroup *cgrp; + struct list_head cgrp_cpuctx_entry; #endif struct list_head sched_cb_entry; -- cgit v1.2.3 From 1fd7e416995401ec082fc0fe6090a223969beda5 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Wed, 18 Jan 2017 11:24:54 -0800 Subject: perf/core: Remove perf_cpu_context::unique_pmu cpuctx->unique_pmu was originally introduced as a way to identify cpuctxs with shared pmus in order to avoid visiting the same cpuctx more than once in a for_each_pmu loop. cpuctx->unique_pmu == cpuctx->pmu in non-software task contexts since they have only one pmu per cpuctx. Since perf_pmu_sched_task() is only called in hw contexts, this patch replaces cpuctx->unique_pmu by cpuctx->pmu in it. The change above, together with the previous patch in this series, removed the remaining uses of cpuctx->unique_pmu, so we remove it altogether. Signed-off-by: David Carrillo-Cisneros Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Dave Hansen Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Paul Turner Cc: Peter Zijlstra Cc: Srinivas Pandruvada Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vikas Shivappa Cc: Vince Weaver Cc: Vince Weaver Link: http://lkml.kernel.org/r/20170118192454.58008-3-davidcc@google.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index dfa725723f28..5c58e93c130c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -785,7 +785,6 @@ struct perf_cpu_context { ktime_t hrtimer_interval; unsigned int hrtimer_active; - struct pmu *unique_pmu; #ifdef CONFIG_CGROUP_PERF struct perf_cgroup *cgrp; struct list_head cgrp_cpuctx_entry; -- cgit v1.2.3 From 5c34153704898ed7ec0f8c0dceb651cbe4b713fd Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Thu, 26 Jan 2017 19:50:49 +0530 Subject: irqchip/gic-v3: Add missing system register definitions Define register definitions for ICH_VMCR_EL2, ICC_CTLR_EL1 and ICH_VTR_EL2, ICC_BPR0_EL1, ICC_BPR1_EL1 registers. Signed-off-by: Vijaya Kumar K Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 43 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index e808f8ae6f14..7f6d904a62c6 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -352,8 +352,30 @@ /* * CPU interface registers */ -#define ICC_CTLR_EL1_EOImode_drop_dir (0U << 1) -#define ICC_CTLR_EL1_EOImode_drop (1U << 1) +#define ICC_CTLR_EL1_EOImode_SHIFT (1) +#define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_CBPR_SHIFT 0 +#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT) +#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8 +#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT) +#define ICC_CTLR_EL1_ID_BITS_SHIFT 11 +#define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT) +#define ICC_CTLR_EL1_SEIS_SHIFT 14 +#define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT) +#define ICC_CTLR_EL1_A3V_SHIFT 15 +#define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT) +#define ICC_PMR_EL1_SHIFT 0 +#define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT) +#define ICC_BPR0_EL1_SHIFT 0 +#define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT) +#define ICC_BPR1_EL1_SHIFT 0 +#define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT) +#define ICC_IGRPEN0_EL1_SHIFT 0 +#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) +#define ICC_IGRPEN1_EL1_SHIFT 0 +#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) #define ICC_SRE_EL1_SRE (1U << 0) /* @@ -384,12 +406,29 @@ #define ICH_VMCR_CTLR_SHIFT 0 #define ICH_VMCR_CTLR_MASK (0x21f << ICH_VMCR_CTLR_SHIFT) +#define ICH_VMCR_CBPR_SHIFT 4 +#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) +#define ICH_VMCR_EOIM_SHIFT 9 +#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT) #define ICH_VMCR_BPR1_SHIFT 18 #define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) #define ICH_VMCR_BPR0_SHIFT 21 #define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) #define ICH_VMCR_PMR_SHIFT 24 #define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) +#define ICH_VMCR_ENG0_SHIFT 0 +#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT) +#define ICH_VMCR_ENG1_SHIFT 1 +#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) + +#define ICH_VTR_PRI_BITS_SHIFT 29 +#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT) +#define ICH_VTR_ID_BITS_SHIFT 23 +#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT) +#define ICH_VTR_SEIS_SHIFT 22 +#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) +#define ICH_VTR_A3V_SHIFT 21 +#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) #define ICC_IAR1_EL1_SPURIOUS 0x3ff -- cgit v1.2.3 From 5fb247d79c04240dce86c842976cde1edde7f7ed Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Thu, 26 Jan 2017 19:50:50 +0530 Subject: KVM: arm/arm64: vgic: Introduce VENG0 and VENG1 fields to vmcr struct ICC_VMCR_EL2 supports virtual access to ICC_IGRPEN1_EL1.Enable and ICC_IGRPEN0_EL1.Enable fields. Add grpen0 and grpen1 member variables to struct vmcr to support read and write of these fields. Also refactor vgic_set_vmcr and vgic_get_vmcr() code. Drop ICH_VMCR_CTLR_SHIFT and ICH_VMCR_CTLR_MASK macros and instead use ICH_VMCR_EOI* and ICH_VMCR_CBPR* macros. Signed-off-by: Vijaya Kumar K Reviewed-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 7f6d904a62c6..170e00a40826 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -404,8 +404,6 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) -#define ICH_VMCR_CTLR_SHIFT 0 -#define ICH_VMCR_CTLR_MASK (0x21f << ICH_VMCR_CTLR_SHIFT) #define ICH_VMCR_CBPR_SHIFT 4 #define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) #define ICH_VMCR_EOIM_SHIFT 9 -- cgit v1.2.3 From 9d93dc1c96ec446bef9c34a189ea24556f1af89a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 25 Jan 2017 13:47:43 +0000 Subject: arm/arm64: KVM: Get rid of KVM_MEMSLOT_INCOHERENT KVM_MEMSLOT_INCOHERENT is not used anymore, as we've killed its only use in the arm/arm64 MMU code. Let's remove the last artifacts. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1c5190dab2c1..cda457bcedc1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -45,7 +45,6 @@ * include/linux/kvm_h. */ #define KVM_MEMSLOT_INVALID (1UL << 16) -#define KVM_MEMSLOT_INCOHERENT (1UL << 17) /* Two fragments for cross MMIO pages. */ #define KVM_MAX_MMIO_FRAGMENTS 2 -- cgit v1.2.3 From 08d85f3ea99f1eeafc4e8507936190e86a16ee8c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 17 Jan 2017 16:00:48 +0000 Subject: irqdomain: Avoid activating interrupts more than once Since commit f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early"), we can end-up activating a PCI/MSI twice (once at allocation time, and once at startup time). This is normally of no consequences, except that there is some HW out there that may misbehave if activate is used more than once (the GICv3 ITS, for example, uses the activate callback to issue the MAPVI command, and the architecture spec says that "If there is an existing mapping for the EventID-DeviceID combination, behavior is UNPREDICTABLE"). While this could be worked around in each individual driver, it may make more sense to tackle the issue at the core level. In order to avoid getting in that situation, let's have a per-interrupt flag to remember if we have already activated that interrupt or not. Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early") Reported-and-tested-by: Andre Przywara Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1484668848-24361-1-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index e79875574b39..39e3254e5769 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -184,6 +184,7 @@ struct irq_data { * * IRQD_TRIGGER_MASK - Mask for the trigger type bits * IRQD_SETAFFINITY_PENDING - Affinity setting is pending + * IRQD_ACTIVATED - Interrupt has already been activated * IRQD_NO_BALANCING - Balancing disabled for this IRQ * IRQD_PER_CPU - Interrupt is per cpu * IRQD_AFFINITY_SET - Interrupt affinity was set @@ -202,6 +203,7 @@ struct irq_data { enum { IRQD_TRIGGER_MASK = 0xf, IRQD_SETAFFINITY_PENDING = (1 << 8), + IRQD_ACTIVATED = (1 << 9), IRQD_NO_BALANCING = (1 << 10), IRQD_PER_CPU = (1 << 11), IRQD_AFFINITY_SET = (1 << 12), @@ -312,6 +314,21 @@ static inline bool irqd_affinity_is_managed(struct irq_data *d) return __irqd_to_state(d) & IRQD_AFFINITY_MANAGED; } +static inline bool irqd_is_activated(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_ACTIVATED; +} + +static inline void irqd_set_activated(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_ACTIVATED; +} + +static inline void irqd_clr_activated(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_ACTIVATED; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) -- cgit v1.2.3 From a92def1becf33e91fc460c7ae575aa9210ba8f40 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Mon, 19 Dec 2016 18:48:29 -0200 Subject: [media] ir-rx51: port to rc-core This driver was written using lirc since rc-core did not support transmitter-only hardware at that time. Now that it does, port this driver. Compile tested only. Signed-off-by: Sean Young Cc: Timo Kokkonen Cc: Ivaylo Dimitrov Signed-off-by: Mauro Carvalho Chehab --- include/linux/platform_data/media/ir-rx51.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/media/ir-rx51.h b/include/linux/platform_data/media/ir-rx51.h index 812d87307877..2c94ab568bfa 100644 --- a/include/linux/platform_data/media/ir-rx51.h +++ b/include/linux/platform_data/media/ir-rx51.h @@ -1,7 +1,7 @@ -#ifndef _LIRC_RX51_H -#define _LIRC_RX51_H +#ifndef _IR_RX51_H +#define _IR_RX51_H -struct lirc_rx51_platform_data { +struct ir_rx51_platform_data { int(*set_max_mpu_wakeup_lat)(struct device *dev, long t); }; -- cgit v1.2.3 From ddeaa6379d50a530f9f57b3d12f7940e079b052c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 15 Oct 2016 13:59:03 -0700 Subject: sunrpc & nfs: Add and use dprintk_cont macros Allow line continuations to work properly with KERN_CONT. Signed-off-by: Joe Perches [Anna: Add fallback dprintk_cont() for when CONFIG_SUNRPC_DEBUG=n] Signed-off-by: Anna Schumaker --- include/linux/sunrpc/debug.h | 58 ++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 59a7889e15db..8da0f37f3bdc 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -20,33 +20,55 @@ extern unsigned int nfsd_debug; extern unsigned int nlm_debug; #endif -#define dprintk(args...) dfprintk(FACILITY, ## args) -#define dprintk_rcu(args...) dfprintk_rcu(FACILITY, ## args) +#define dprintk(fmt, ...) \ + dfprintk(FACILITY, fmt, ##__VA_ARGS__) +#define dprintk_cont(fmt, ...) \ + dfprintk_cont(FACILITY, fmt, ##__VA_ARGS__) +#define dprintk_rcu(fmt, ...) \ + dfprintk_rcu(FACILITY, fmt, ##__VA_ARGS__) +#define dprintk_rcu_cont(fmt, ...) \ + dfprintk_rcu_cont(FACILITY, fmt, ##__VA_ARGS__) #undef ifdebug #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define ifdebug(fac) if (unlikely(rpc_debug & RPCDBG_##fac)) -# define dfprintk(fac, args...) \ - do { \ - ifdebug(fac) \ - printk(KERN_DEFAULT args); \ - } while (0) - -# define dfprintk_rcu(fac, args...) \ - do { \ - ifdebug(fac) { \ - rcu_read_lock(); \ - printk(KERN_DEFAULT args); \ - rcu_read_unlock(); \ - } \ - } while (0) +# define dfprintk(fac, fmt, ...) \ +do { \ + ifdebug(fac) \ + printk(KERN_DEFAULT fmt, ##__VA_ARGS__); \ +} while (0) + +# define dfprintk_cont(fac, fmt, ...) \ +do { \ + ifdebug(fac) \ + printk(KERN_CONT fmt, ##__VA_ARGS__); \ +} while (0) + +# define dfprintk_rcu(fac, fmt, ...) \ +do { \ + ifdebug(fac) { \ + rcu_read_lock(); \ + printk(KERN_DEFAULT fmt, ##__VA_ARGS__); \ + rcu_read_unlock(); \ + } \ +} while (0) + +# define dfprintk_rcu_cont(fac, fmt, ...) \ +do { \ + ifdebug(fac) { \ + rcu_read_lock(); \ + printk(KERN_CONT fmt, ##__VA_ARGS__); \ + rcu_read_unlock(); \ + } \ +} while (0) # define RPC_IFDEBUG(x) x #else # define ifdebug(fac) if (0) -# define dfprintk(fac, args...) do {} while (0) -# define dfprintk_rcu(fac, args...) do {} while (0) +# define dfprintk(fac, fmt, ...) do {} while (0) +# define dfprintk_cont(fac, fmt, ...) do {} while (0) +# define dfprintk_rcu(fac, fmt, ...) do {} while (0) # define RPC_IFDEBUG(x) #endif -- cgit v1.2.3 From 297e1cf29eac13d3e5bb896d18c64a50b6bb48eb Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Sun, 29 Jan 2017 18:56:17 +0200 Subject: net/mlx4_en: Adding support of turning off link autonegotiation via ethtool This feature will allow the user to disable auto negotiation on the port for mlx4 devices while setting the speed is limited to 1GbE speeds. Other speeds will not be accepted in autoneg off mode. This functionality is permitted providing that the firmware is compatible with this feature. The above is determined by querying a new dedicated capability bit in the device. Signed-off-by: Ariel Levkovich Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6533c16e27ad..c3ac945b2759 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1539,8 +1539,13 @@ enum mlx4_ptys_proto { MLX4_PTYS_EN = 1<<2, }; +enum mlx4_ptys_flags { + MLX4_PTYS_AN_DISABLE_CAP = 1 << 5, + MLX4_PTYS_AN_DISABLE_ADMIN = 1 << 6, +}; + struct mlx4_ptys_reg { - u8 resrvd1; + u8 flags; u8 local_port; u8 resrvd2; u8 proto_mask; -- cgit v1.2.3 From 40fb4fc1e18bc641a0d0887ac21943fd194c1fa9 Mon Sep 17 00:00:00 2001 From: Shaker Daibes Date: Sun, 29 Jan 2017 18:56:18 +0200 Subject: net/mlx4_en: Pass user MTU value to Firmware at set port command When starting the port, driver will inform Firmware about the actual MTU which does not include implicit headers, such as FCS or VLAN tags. Signed-off-by: Shaker Daibes Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c3ac945b2759..7e66e4f62858 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1374,6 +1374,7 @@ int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port); int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac); int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); +int mlx4_SET_PORT_user_mtu(struct mlx4_dev *dev, u8 port, u16 user_mtu); int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time); -- cgit v1.2.3 From 9e9fc6f00a54f7064dc681ac187be6498d566a4f Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Wed, 25 Jan 2017 14:06:27 +0530 Subject: cpuidle:powernv: Add helper function to populate powernv idle states. In the current code for powernv_add_idle_states, there is a lot of code duplication while initializing an idle state in powernv_states table. Add an inline helper function to populate the powernv_states[] table for a given idle state. Invoke this for populating the "Nap", "Fastsleep" and the stop states in powernv_add_idle_states. Signed-off-by: Gautham R. Shenoy Acked-by: Balbir Singh Acked-by: Rafael J. Wysocki Signed-off-by: Michael Ellerman --- include/linux/cpuidle.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index da346f2817a8..fc1e5d7fc1c7 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -62,6 +62,7 @@ struct cpuidle_state { }; /* Idle State Flags */ +#define CPUIDLE_FLAG_NONE (0x00) #define CPUIDLE_FLAG_COUPLED (0x02) /* state applies to multiple cpus */ #define CPUIDLE_FLAG_TIMER_STOP (0x04) /* timer is stopped on this state */ -- cgit v1.2.3 From 608d792192d7136d354bc1b44585c441164dc54e Mon Sep 17 00:00:00 2001 From: Sarangdhar Joshi Date: Mon, 23 Jan 2017 17:53:18 -0800 Subject: remoteproc: Add RPROC_DELETED state Add new state RPROC_DELETED to handle synchronization between rproc_del() and other operations on rproc. This state represents the rproc device that has been "deleted". CC: Loic Pallardy CC: Bjorn Andersson Signed-off-by: Sarangdhar Joshi Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 8265d351c9f0..e691f64fc7be 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -346,6 +346,7 @@ struct rproc_ops { * a message. * @RPROC_RUNNING: device is up and running * @RPROC_CRASHED: device has crashed; need to start recovery + * @RPROC_DELETED: device is deleted * @RPROC_LAST: just keep this one at the end * * Please note that the values of these states are used as indices @@ -359,7 +360,8 @@ enum rproc_state { RPROC_SUSPENDED = 1, RPROC_RUNNING = 2, RPROC_CRASHED = 3, - RPROC_LAST = 4, + RPROC_DELETED = 4, + RPROC_LAST = 5, }; /** -- cgit v1.2.3 From 2099c77d4af7d1582db6d4437014cf18fe62e74b Mon Sep 17 00:00:00 2001 From: Sarangdhar Joshi Date: Mon, 23 Jan 2017 17:53:19 -0800 Subject: remoteproc: Drop firmware_loading_complete firmware_loading_complete is used to synchronize operations on rproc while asynchronous firmware loading is in progress. However, rproc_boot() no longer waits on firmware_loading_complete. Hence drop this completion variable altogether and handle the race between rproc_del() and rproc_boot() using new state RPROC_DELETED. The request_firmware_nowait() will hold the reference to rproc device by using a get_device()/put_device(), so the rproc struct will remain valid even when we return from rproc_del() before the asynchronous call to rproc_fw_config_virtio() completes. CC: Loic Pallardy CC: Bjorn Andersson Signed-off-by: Sarangdhar Joshi Signed-off-by: Bjorn Andersson --- include/linux/remoteproc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index e691f64fc7be..81da49564ff4 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -399,7 +399,6 @@ enum rproc_crash_type { * @num_traces: number of trace buffers * @carveouts: list of physically contiguous memory allocations * @mappings: list of iommu mappings we initiated, needed on shutdown - * @firmware_loading_complete: marks e/o asynchronous firmware loading * @bootaddr: address of first instruction to boot rproc with (optional) * @rvdevs: list of remote virtio devices * @subdevs: list of subdevices, to following the running state @@ -431,7 +430,6 @@ struct rproc { int num_traces; struct list_head carveouts; struct list_head mappings; - struct completion firmware_loading_complete; u32 bootaddr; struct list_head rvdevs; struct list_head subdevs; -- cgit v1.2.3 From 8ff6daa17b6a64e59bbabaa116b9bd854fa4da1f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Jan 2017 23:20:26 -0800 Subject: iomap: constify struct iomap_ops Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/dax.h | 8 ++++---- include/linux/iomap.h | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 24ad71173995..2983e52efd07 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -37,9 +37,9 @@ static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags) } ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, - struct iomap_ops *ops); + const struct iomap_ops *ops); int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, - struct iomap_ops *ops); + const struct iomap_ops *ops); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, @@ -72,7 +72,7 @@ static inline unsigned int dax_radix_order(void *entry) return 0; } int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmd, unsigned int flags, struct iomap_ops *ops); + pmd_t *pmd, unsigned int flags, const struct iomap_ops *ops); #else static inline unsigned int dax_radix_order(void *entry) { @@ -80,7 +80,7 @@ static inline unsigned int dax_radix_order(void *entry) } static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, unsigned int flags, - struct iomap_ops *ops) + const struct iomap_ops *ops) { return VM_FAULT_FALLBACK; } diff --git a/include/linux/iomap.h b/include/linux/iomap.h index a4c94b86401e..891459caa278 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -72,17 +72,17 @@ struct iomap_ops { }; ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, - struct iomap_ops *ops); + const struct iomap_ops *ops); int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, - struct iomap_ops *ops); + const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, - bool *did_zero, struct iomap_ops *ops); + bool *did_zero, const struct iomap_ops *ops); int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, - struct iomap_ops *ops); + const struct iomap_ops *ops); int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - struct iomap_ops *ops); + const struct iomap_ops *ops); int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - loff_t start, loff_t len, struct iomap_ops *ops); + loff_t start, loff_t len, const struct iomap_ops *ops); /* * Flags for direct I/O ->end_io: @@ -92,6 +92,6 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret, unsigned flags); ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, - struct iomap_ops *ops, iomap_dio_end_io_t end_io); + const struct iomap_ops *ops, iomap_dio_end_io_t end_io); #endif /* LINUX_IOMAP_H */ -- cgit v1.2.3 From 77d65d6f3d60cebb2dc24cf05408255a21bb6409 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 27 Jan 2017 17:42:01 +0100 Subject: dmaengine: Provide a wrapper for memcpy operations Almost all ->device_prep_dma_xx() methods have a wrapper defined in dmaengine.h. Add one for ->device_prep_dma_memcpy(). Signed-off-by: Boris Brezillon Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index feee6ec6a13b..533680860865 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -894,6 +894,17 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memset( len, flags); } +static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + if (!chan || !chan->device || !chan->device->device_prep_dma_memcpy) + return NULL; + + return chan->device->device_prep_dma_memcpy(chan, dest, src, + len, flags); +} + static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg( struct dma_chan *chan, struct scatterlist *dst_sg, unsigned int dst_nents, -- cgit v1.2.3 From bcf23c79c4e46130701370af4383b61a3cba755c Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 31 Jan 2017 15:38:16 +0900 Subject: PM / devfreq: Fix available_governor sysfs The devfreq using passive governor is not able to change the governor. So, the user can not change the governor through 'available_governor' sysfs entry. Also, the devfreq which don't use the passive governor is not able to change to 'passive' governor on the fly. Fixes: 996133119f57 ("PM / devfreq: Add new passive governor") Cc: stable@vger.kernel.org Signed-off-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- include/linux/devfreq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 2de4e2eea180..e0acb0e5243b 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -104,6 +104,8 @@ struct devfreq_dev_profile { * struct devfreq_governor - Devfreq policy governor * @node: list node - contains registered devfreq governors * @name: Governor's name + * @immutable: Immutable flag for governor. If the value is 1, + * this govenror is never changeable to other governor. * @get_target_freq: Returns desired operating frequency for the device. * Basically, get_target_freq will run * devfreq_dev_profile.get_dev_status() to get the @@ -121,6 +123,7 @@ struct devfreq_governor { struct list_head node; const char name[DEVFREQ_NAME_LEN]; + const unsigned int immutable; int (*get_target_freq)(struct devfreq *this, unsigned long *freq); int (*event_handler)(struct devfreq *devfreq, unsigned int event, void *data); -- cgit v1.2.3 From 433e19cf33d34bb6751c874a9c00980552fe508c Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sat, 28 Jan 2017 11:46:02 -0700 Subject: Drivers: hv: vmbus: finally fix hv_need_to_signal_on_read() Commit a389fcfd2cb5 ("Drivers: hv: vmbus: Fix signaling logic in hv_need_to_signal_on_read()") added the proper mb(), but removed the test "prev_write_sz < pending_sz" when making the signal decision. As a result, the guest can signal the host unnecessarily, and then the host can throttle the guest because the host thinks the guest is buggy or malicious; finally the user running stress test can perceive intermittent freeze of the guest. This patch brings back the test, and properly handles the in-place consumption APIs used by NetVSC (see get_next_pkt_raw(), put_pkt_raw() and commit_rd_index()). Fixes: a389fcfd2cb5 ("Drivers: hv: vmbus: Fix signaling logic in hv_need_to_signal_on_read()") Signed-off-by: Dexuan Cui Reported-by: Rolf Neugebauer Tested-by: Rolf Neugebauer Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 42fe43fb0c80..183efde54269 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -128,6 +128,7 @@ struct hv_ring_buffer_info { u32 ring_data_startoffset; u32 priv_write_index; u32 priv_read_index; + u32 cached_read_index; }; /* @@ -180,6 +181,19 @@ static inline u32 hv_get_bytes_to_write(struct hv_ring_buffer_info *rbi) return write; } +static inline u32 hv_get_cached_bytes_to_write( + const struct hv_ring_buffer_info *rbi) +{ + u32 read_loc, write_loc, dsize, write; + + dsize = rbi->ring_datasize; + read_loc = rbi->cached_read_index; + write_loc = rbi->ring_buffer->write_index; + + write = write_loc >= read_loc ? dsize - (write_loc - read_loc) : + read_loc - write_loc; + return write; +} /* * VMBUS version is 32 bit entity broken up into * two 16 bit quantities: major_number. minor_number. @@ -1488,7 +1502,7 @@ hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info) static inline void hv_signal_on_read(struct vmbus_channel *channel) { - u32 cur_write_sz; + u32 cur_write_sz, cached_write_sz; u32 pending_sz; struct hv_ring_buffer_info *rbi = &channel->inbound; @@ -1512,12 +1526,24 @@ static inline void hv_signal_on_read(struct vmbus_channel *channel) cur_write_sz = hv_get_bytes_to_write(rbi); - if (cur_write_sz >= pending_sz) + if (cur_write_sz < pending_sz) + return; + + cached_write_sz = hv_get_cached_bytes_to_write(rbi); + if (cached_write_sz < pending_sz) vmbus_setevent(channel); return; } +static inline void +init_cached_read_index(struct vmbus_channel *channel) +{ + struct hv_ring_buffer_info *rbi = &channel->inbound; + + rbi->cached_read_index = rbi->ring_buffer->read_index; +} + /* * An API to support in-place processing of incoming VMBUS packets. */ @@ -1569,6 +1595,8 @@ static inline void put_pkt_raw(struct vmbus_channel *channel, * This call commits the read index and potentially signals the host. * Here is the pattern for using the "in-place" consumption APIs: * + * init_cached_read_index(); + * * while (get_next_pkt_raw() { * process the packet "in-place"; * put_pkt_raw(); -- cgit v1.2.3 From a1656454131880980bc3a5313c8bf66ef5990c91 Mon Sep 17 00:00:00 2001 From: Alex Ng Date: Sat, 28 Jan 2017 12:37:17 -0700 Subject: Drivers: hv: vmbus: Use all supported IC versions to negotiate Previously, we were assuming that each IC protocol version was tied to a specific host version. For example, some Windows 10 preview hosts only support v3 TimeSync even though driver assumes v4 is supported by all Windows 10 hosts. The guest will stop trying to negotiate even though older supported versions may still be offered by the host. Make IC version negotiation more robust by going through all versions that are supported by the guest. Fixes: 3da0401b4d0e ("Drivers: hv: utils: Fix the mapping between host version and protocol to use") Reported-by: Rolf Neugebauer Signed-off-by: Alex Ng Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 7ea20bd7cdd1..85b26f06e172 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1445,9 +1445,10 @@ struct hyperv_service_callback { }; #define MAX_SRV_VER 0x7ffffff -extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *, - struct icmsg_negotiate *, u8 *, int, - int); +extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, + const int *fw_version, int fw_vercnt, + const int *srv_version, int srv_vercnt, + int *nego_fw_version, int *nego_srv_version); void hv_event_tasklet_disable(struct vmbus_channel *channel); void hv_event_tasklet_enable(struct vmbus_channel *channel); -- cgit v1.2.3 From aaa59306b0b7e0ca4ba92cc04c5db101cbb1c096 Mon Sep 17 00:00:00 2001 From: CQ Tang Date: Mon, 30 Jan 2017 09:39:52 -0800 Subject: iommu/vt-d: Fix some macros that are incorrectly specified in intel-iommu Some of the macros are incorrect with wrong bit-shifts resulting in picking the incorrect invalidation granularity. Incorrect Source-ID in extended devtlb invalidation caused device side errors. To: Joerg Roedel To: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Cc: CQ Tang Cc: Ashok Raj Fixes: 2f26e0a9 ("iommu/vt-d: Add basic SVM PASID support") Signed-off-by: CQ Tang Signed-off-by: Ashok Raj Tested-by: CQ Tang Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d49e26c6cdc7..23e129ef6726 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -153,8 +153,8 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) #define DMA_TLB_DSI_FLUSH (((u64)2) << 60) #define DMA_TLB_PSI_FLUSH (((u64)3) << 60) -#define DMA_TLB_IIRG(type) ((type >> 60) & 7) -#define DMA_TLB_IAIG(val) (((val) >> 57) & 7) +#define DMA_TLB_IIRG(type) ((type >> 60) & 3) +#define DMA_TLB_IAIG(val) (((val) >> 57) & 3) #define DMA_TLB_READ_DRAIN (((u64)1) << 49) #define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) #define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) @@ -164,9 +164,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) /* INVALID_DESC */ #define DMA_CCMD_INVL_GRANU_OFFSET 61 -#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3) -#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3) -#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3) +#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 4) +#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 4) +#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 4) #define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7) #define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6) #define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16))) @@ -316,8 +316,8 @@ enum { #define QI_DEV_EIOTLB_SIZE (((u64)1) << 11) #define QI_DEV_EIOTLB_GLOB(g) ((u64)g) #define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32) -#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) -#define QI_DEV_EIOTLB_QDEP(qd) (((qd) & 0x1f) << 16) +#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16) +#define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4) #define QI_DEV_EIOTLB_MAX_INVS 32 #define QI_PGRP_IDX(idx) (((u64)(idx)) << 55) -- cgit v1.2.3 From ade69e2432b795c76653e1dfa09c684549826a50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Tue, 31 Jan 2017 13:17:09 +0100 Subject: lightnvm: merge gennvm with core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the first iteration of Open-Channel SSDs, it was anticipated that there could be various media managers on top of an open-channel SSD, such to allow vendors to plug in their own host-side FTLs, without the media manager in between. Now that an Open-Channel SSD is exposed as a traditional block device, there is no longer a need for this. Therefore lets merge the gennvm code with core and simplify the stack. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 97 ++++-------------------------------------------- 1 file changed, 7 insertions(+), 90 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 7c273bbc5351..84309fe27472 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -80,8 +80,6 @@ struct nvm_dev_ops { unsigned int max_phys_sect; }; - - #ifdef CONFIG_NVM #include @@ -272,15 +270,6 @@ enum { NVM_BLK_ST_BAD = 0x8, /* Bad block */ }; -/* system block cpu representation */ -struct nvm_sb_info { - unsigned long seqnr; - unsigned long erase_cnt; - unsigned int version; - char mmtype[NVM_MMTYPE_LEN]; - struct ppa_addr fs_ppa; -}; - /* Device generic information */ struct nvm_geo { int nr_chnls; @@ -308,6 +297,7 @@ struct nvm_geo { int sec_per_lun; }; +/* sub-device structure */ struct nvm_tgt_dev { /* Device information */ struct nvm_geo geo; @@ -329,17 +319,10 @@ struct nvm_dev { struct list_head devices; - /* Media manager */ - struct nvmm_type *mt; - void *mp; - - /* System blocks */ - struct nvm_sb_info sb; - /* Device information */ struct nvm_geo geo; - /* lower page table */ + /* lower page table */ int lps_per_blk; int *lptbl; @@ -359,6 +342,10 @@ struct nvm_dev { struct mutex mlock; spinlock_t lock; + + /* target management */ + struct list_head area_list; + struct list_head targets; }; static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo, @@ -452,11 +439,6 @@ static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2) (ppa1.g.blk == ppa2.g.blk)); } -static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg) -{ - return dev->lptbl[slc_pg]; -} - typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *); @@ -487,49 +469,6 @@ extern void nvm_unregister_tgt_type(struct nvm_tgt_type *); extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *); extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t); -typedef int (nvmm_register_fn)(struct nvm_dev *); -typedef void (nvmm_unregister_fn)(struct nvm_dev *); - -typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *); -typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *); -typedef int (nvmm_submit_io_fn)(struct nvm_tgt_dev *, struct nvm_rq *); -typedef int (nvmm_erase_blk_fn)(struct nvm_tgt_dev *, struct ppa_addr *, int); -typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t); -typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t); -typedef struct ppa_addr (nvmm_trans_ppa_fn)(struct nvm_tgt_dev *, - struct ppa_addr, int); -typedef void (nvmm_part_to_tgt_fn)(struct nvm_dev *, sector_t*, int); - -enum { - TRANS_TGT_TO_DEV = 0x0, - TRANS_DEV_TO_TGT = 0x1, -}; - -struct nvmm_type { - const char *name; - unsigned int version[3]; - - nvmm_register_fn *register_mgr; - nvmm_unregister_fn *unregister_mgr; - - nvmm_create_tgt_fn *create_tgt; - nvmm_remove_tgt_fn *remove_tgt; - - nvmm_submit_io_fn *submit_io; - nvmm_erase_blk_fn *erase_blk; - - nvmm_get_area_fn *get_area; - nvmm_put_area_fn *put_area; - - nvmm_trans_ppa_fn *trans_ppa; - nvmm_part_to_tgt_fn *part_to_tgt; - - struct list_head list; -}; - -extern int nvm_register_mgr(struct nvmm_type *); -extern void nvm_unregister_mgr(struct nvmm_type *); - extern struct nvm_dev *nvm_alloc_dev(int); extern int nvm_register(struct nvm_dev *); extern void nvm_unregister(struct nvm_dev *); @@ -559,31 +498,9 @@ extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *); extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); -/* sysblk.c */ -#define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */ - -/* system block on disk representation */ -struct nvm_system_block { - __be32 magic; /* magic signature */ - __be32 seqnr; /* sequence number */ - __be32 erase_cnt; /* erase count */ - __be16 version; /* version number */ - u8 mmtype[NVM_MMTYPE_LEN]; /* media manager name */ - __be64 fs_ppa; /* PPA for media manager - * superblock */ -}; - -extern int nvm_get_sysblock(struct nvm_dev *, struct nvm_sb_info *); -extern int nvm_update_sysblock(struct nvm_dev *, struct nvm_sb_info *); -extern int nvm_init_sysblock(struct nvm_dev *, struct nvm_sb_info *); - extern int nvm_dev_factory(struct nvm_dev *, int flags); -#define nvm_for_each_lun_ppa(geo, ppa, chid, lunid) \ - for ((chid) = 0, (ppa).ppa = 0; (chid) < (geo)->nr_chnls; \ - (chid)++, (ppa).g.ch = (chid)) \ - for ((lunid) = 0; (lunid) < (geo)->luns_per_chnl; \ - (lunid)++, (ppa).g.lun = (lunid)) +extern void nvm_part_to_tgt(struct nvm_dev *, sector_t *, int); #else /* CONFIG_NVM */ struct nvm_dev_ops; -- cgit v1.2.3 From 10995c3dc9d7f47b92ff3e74b4bd191ddb7991ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Tue, 31 Jan 2017 13:17:10 +0100 Subject: lightnvm: collapse nvm_erase_ppa and nvm_erase_blk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After gennvm and core have been merged, there are no more callers to nvm_erase_ppa. Therefore collapse the device specific and target specific erase functions. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 84309fe27472..f2007b2c4979 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -483,7 +483,6 @@ extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *); extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *, const struct ppa_addr *, int, int); extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *); -extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int); extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int); extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, void *); -- cgit v1.2.3 From 583b7058b2e8071f59646c8fb027f6c3597417ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Tue, 31 Jan 2017 13:17:11 +0100 Subject: lightnvm: remove nvm_submit_ppa* functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nvm_submit_ppa* functions are no longer needed after gennvm and core have been merged. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index f2007b2c4979..abb3d55c107f 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -489,10 +489,6 @@ extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t); extern void nvm_put_area(struct nvm_tgt_dev *, sector_t); extern void nvm_end_io(struct nvm_rq *, int); -extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int, - void *, int); -extern int nvm_submit_ppa_list(struct nvm_dev *, struct ppa_addr *, int, int, - int, void *, int); extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *); extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); -- cgit v1.2.3 From 8f4fe008fb256649bd0e16c96a6eafa3bd916ac3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Tue, 31 Jan 2017 13:17:12 +0100 Subject: lightnvm: remove nvm_get_bb_tbl and nvm_set_bb_tbl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the merge of gennvm and core, there is no longer a need for the device specific bad block functions. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index abb3d55c107f..cad1e1cb0635 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -473,7 +473,6 @@ extern struct nvm_dev *nvm_alloc_dev(int); extern int nvm_register(struct nvm_dev *); extern void nvm_unregister(struct nvm_dev *); -extern int nvm_set_bb_tbl(struct nvm_dev *, struct ppa_addr *, int, int); extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, int, int); extern int nvm_max_phys_sects(struct nvm_tgt_dev *); @@ -490,7 +489,6 @@ extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t); extern void nvm_put_area(struct nvm_tgt_dev *, sector_t); extern void nvm_end_io(struct nvm_rq *, int); extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); -extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *); extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); extern int nvm_dev_factory(struct nvm_dev *, int flags); -- cgit v1.2.3 From dab8ee9e8a30620a5b5f22d6c0b3749217093803 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Tue, 31 Jan 2017 13:17:14 +0100 Subject: lightnvm: cleanup nvm transformation functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Going from target specific ppa addresses to device was accomplished by first converting target to generic ppa addresses and generic to device addresses. The conversion was either open-coded or used the built-in nvm_trans_* and nvm_map_* functions for conversion. Simplify the interface and cleanup the calls to provide clean functions that now either take a list of ppas or a nvm_rq, and is exposed through: void nvm_ppa_* - target to/from device with a list of PPAs, void nvm_rq_* - target to/from device with a nvm_rq. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index cad1e1cb0635..faa0fbfe339a 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -378,10 +378,10 @@ static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo, return l; } -static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev, - struct ppa_addr r) +static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev, + struct ppa_addr r) { - struct nvm_geo *geo = &dev->geo; + struct nvm_geo *geo = &tgt_dev->geo; struct ppa_addr l; l.ppa = ((u64)r.g.blk) << geo->ppaf.blk_offset; @@ -394,10 +394,10 @@ static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev, return l; } -static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev, - struct ppa_addr r) +static inline struct ppa_addr dev_to_generic_addr(struct nvm_tgt_dev *tgt_dev, + struct ppa_addr r) { - struct nvm_geo *geo = &dev->geo; + struct nvm_geo *geo = &tgt_dev->geo; struct ppa_addr l; l.ppa = 0; @@ -477,8 +477,6 @@ extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, int, int); extern int nvm_max_phys_sects(struct nvm_tgt_dev *); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); -extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *); -extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *); extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *, const struct ppa_addr *, int, int); extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *); -- cgit v1.2.3 From 19bd6fe73ca812964963aa30827cff9aae64a715 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Tue, 31 Jan 2017 13:17:15 +0100 Subject: lightnvm: reduce number of nvm_id groups to one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The number of configuration groups has been limited to one in current code, even if there is support for up to four. With the introduction of the open-channel SSD 1.3 specification, only a single group is exposed onwards. Reflect this in the nvm_id structure. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index faa0fbfe339a..ce0b2dac84ac 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -200,11 +200,10 @@ struct nvm_addr_format { struct nvm_id { u8 ver_id; u8 vmnt; - u8 cgrps; u32 cap; u32 dom; struct nvm_addr_format ppaf; - struct nvm_id_group groups[4]; + struct nvm_id_group grp; } __packed; struct nvm_target { -- cgit v1.2.3 From 06894efea706b3cd4ce31e341ec51b4c62c34a86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Tue, 31 Jan 2017 13:17:17 +0100 Subject: lightnvm: use end_io callback instead of instance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the lightnvm core had the "gennvm" layer between the device and the target, there was a need for the core to be able to figure out which target it should send an end_io callback to. Leading to a "double" end_io, first for the media manager instance, and then for the target instance. Now that core and gennvm is merged, there is no longer a need for this, and a single end_io callback will do. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index ce0b2dac84ac..17cd454f0d87 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -213,10 +213,6 @@ struct nvm_target { struct gendisk *disk; }; -struct nvm_tgt_instance { - struct nvm_tgt_type *tt; -}; - #define ADDR_EMPTY (~0ULL) #define NVM_VERSION_MAJOR 1 @@ -227,7 +223,6 @@ struct nvm_rq; typedef void (nvm_end_io_fn)(struct nvm_rq *); struct nvm_rq { - struct nvm_tgt_instance *ins; struct nvm_tgt_dev *dev; struct bio *bio; @@ -251,6 +246,8 @@ struct nvm_rq { u64 ppa_status; /* ppa media status */ int error; + + void *private; }; static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu) @@ -450,7 +447,6 @@ struct nvm_tgt_type { /* target entry points */ nvm_tgt_make_rq_fn *make_rq; nvm_tgt_capacity_fn *capacity; - nvm_end_io_fn *end_io; /* module-specific init/teardown */ nvm_tgt_init_fn *init; @@ -484,7 +480,7 @@ extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, void *); extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t); extern void nvm_put_area(struct nvm_tgt_dev *, sector_t); -extern void nvm_end_io(struct nvm_rq *, int); +extern void nvm_end_io(struct nvm_rq *); extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); -- cgit v1.2.3 From 38ea2f7656f815e7330868cbec7bada0fd7933a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Tue, 31 Jan 2017 13:17:18 +0100 Subject: lightnvm: Add CRC read error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let the host differentiate between a read error and a CRC check error on the device side. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 17cd454f0d87..bc282d26017a 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -107,6 +107,7 @@ enum { NVM_RSP_ERR_FAILWRITE = 0x40ff, NVM_RSP_ERR_EMPTYPAGE = 0x42ff, NVM_RSP_ERR_FAILECC = 0x4281, + NVM_RSP_ERR_FAILCRC = 0x4004, NVM_RSP_WARN_HIGHECC = 0x4700, /* Device opcodes */ -- cgit v1.2.3 From 9a69b0ed6257ae5e71c99bf21ce53f98c558476a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Tue, 31 Jan 2017 13:17:20 +0100 Subject: lightnvm: allow targets to use sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to register through the sysfs interface, a driver needs to know its kobject. On a disk structure, this happens when the partition information is added (device_add_disk), which for lightnvm takes place after the target has been initialized. This means that on target initialization, the kboject has not been created yet. This patch adds a target function to let targets initialize their own kboject as a child of the disk kobject. Signed-off-by: Javier González Added exit typedef and passed gendisk instead of void pointer for exit. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index bc282d26017a..ca45e4a088a9 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -440,6 +440,8 @@ typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *); typedef void (nvm_tgt_exit_fn)(void *); +typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *); +typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *); struct nvm_tgt_type { const char *name; @@ -453,6 +455,10 @@ struct nvm_tgt_type { nvm_tgt_init_fn *init; nvm_tgt_exit_fn *exit; + /* sysfs */ + nvm_tgt_sysfs_init_fn *sysfs_init; + nvm_tgt_sysfs_exit_fn *sysfs_exit; + /* For internal use */ struct list_head list; }; -- cgit v1.2.3 From 2b477c00f3bd87c3286f5940cb4174d8b01ee0d5 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Thu, 22 Dec 2016 12:38:06 -0500 Subject: svcrpc: free contexts immediately on PROC_DESTROY We currently handle a client PROC_DESTROY request by turning it CACHE_NEGATIVE, setting the expired time to now, and then waiting for cache_clean to clean it up later. Since we forgot to set the cache's nextcheck value, that could take up to 30 minutes. Also, though there's probably no real bug in this case, setting CACHE_NEGATIVE directly like this probably isn't a great idea in general. So let's just remove the entry from the cache directly, and move this bit of cache manipulation to a helper function. Signed-off-by: Neil Brown Reported-by: Andy Adamson Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 62a60eeacb0a..9dcf2c8fe1c5 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -227,6 +227,7 @@ extern void sunrpc_destroy_cache_detail(struct cache_detail *cd); extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, umode_t, struct cache_detail *); extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); +extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *); /* Must store cache_detail in seq_file->private if using next three functions */ extern void *cache_seq_start(struct seq_file *file, loff_t *pos); -- cgit v1.2.3 From e26bfebdfc0d212d366de9990a096665d5c0209a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 31 Jan 2017 09:45:28 +0000 Subject: fscache: Fix dead object requeue Under some circumstances, an fscache object can become queued such that it fscache_object_work_func() can be called once the object is in the OBJECT_DEAD state. This results in the kernel oopsing when it tries to invoke the handler for the state (which is hard coded to 0x2). The way this comes about is something like the following: (1) The object dispatcher is processing a work state for an object. This is done in workqueue context. (2) An out-of-band event comes in that isn't masked, causing the object to be queued, say EV_KILL. (3) The object dispatcher finishes processing the current work state on that object and then sees there's another event to process, so, without returning to the workqueue core, it processes that event too. It then follows the chain of events that initiates until we reach OBJECT_DEAD without going through a wait state (such as WAIT_FOR_CLEARANCE). At this point, object->events may be 0, object->event_mask will be 0 and oob_event_mask will be 0. (4) The object dispatcher returns to the workqueue processor, and in due course, this sees that the object's work item is still queued and invokes it again. (5) The current state is a work state (OBJECT_DEAD), so the dispatcher jumps to it - resulting in an OOPS. When I'm seeing this, the work state in (1) appears to have been either LOOK_UP_OBJECT or CREATE_OBJECT (object->oob_table is fscache_osm_lookup_oob). The window for (2) is very small: (A) object->event_mask is cleared whilst the event dispatch process is underway - though there's no memory barrier to force this to the top of the function. The window, therefore is from the time the object was selected by the workqueue processor and made requeueable to the time the mask was cleared. (B) fscache_raise_event() will only queue the object if it manages to set the event bit and the corresponding event_mask bit was set. The enqueuement is then deferred slightly whilst we get a ref on the object and get the per-CPU variable for workqueue congestion. This slight deferral slightly increases the probability by allowing extra time for the workqueue to make the item requeueable. Handle this by giving the dead state a processor function and checking the for the dead state address rather than seeing if the processor function is address 0x2. The dead state processor function can then set a flag to indicate that it's occurred and give a warning if it occurs more than once per object. If this race occurs, an oops similar to the following is seen (note the RIP value): BUG: unable to handle kernel NULL pointer dereference at 0000000000000002 IP: [<0000000000000002>] 0x1 PGD 0 Oops: 0010 [#1] SMP Modules linked in: ... CPU: 17 PID: 16077 Comm: kworker/u48:9 Not tainted 3.10.0-327.18.2.el7.x86_64 #1 Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 12/27/2015 Workqueue: fscache_object fscache_object_work_func [fscache] task: ffff880302b63980 ti: ffff880717544000 task.ti: ffff880717544000 RIP: 0010:[<0000000000000002>] [<0000000000000002>] 0x1 RSP: 0018:ffff880717547df8 EFLAGS: 00010202 RAX: ffffffffa0368640 RBX: ffff880edf7a4480 RCX: dead000000200200 RDX: 0000000000000002 RSI: 00000000ffffffff RDI: ffff880edf7a4480 RBP: ffff880717547e18 R08: 0000000000000000 R09: dfc40a25cb3a4510 R10: dfc40a25cb3a4510 R11: 0000000000000400 R12: 0000000000000000 R13: ffff880edf7a4510 R14: ffff8817f6153400 R15: 0000000000000600 FS: 0000000000000000(0000) GS:ffff88181f420000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000002 CR3: 000000000194a000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffffffffa0363695 ffff880edf7a4510 ffff88093f16f900 ffff8817faa4ec00 ffff880717547e60 ffffffff8109d5db 00000000faa4ec18 0000000000000000 ffff8817faa4ec18 ffff88093f16f930 ffff880302b63980 ffff88093f16f900 Call Trace: [] ? fscache_object_work_func+0xa5/0x200 [fscache] [] process_one_work+0x17b/0x470 [] worker_thread+0x21c/0x400 [] ? rescuer_thread+0x400/0x400 [] kthread+0xcf/0xe0 [] ? kthread_create_on_node+0x140/0x140 [] ret_from_fork+0x58/0x90 [] ? kthread_create_on_node+0x140/0x140 Signed-off-by: David Howells Acked-by: Jeremy McNicoll Tested-by: Frank Sorenson Tested-by: Benjamin Coddington Reviewed-by: Benjamin Coddington Signed-off-by: Al Viro --- include/linux/fscache-cache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 13ba552e6c09..4c467ef50159 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -360,6 +360,7 @@ struct fscache_object { #define FSCACHE_OBJECT_IS_AVAILABLE 5 /* T if object has become active */ #define FSCACHE_OBJECT_RETIRED 6 /* T if object was retired on relinquishment */ #define FSCACHE_OBJECT_KILLED_BY_CACHE 7 /* T if object was killed by the cache */ +#define FSCACHE_OBJECT_RUN_AFTER_DEAD 8 /* T if object has been dispatched after death */ struct list_head cache_link; /* link in cache->object_list */ struct hlist_node cookie_link; /* link in cookie->backing_objects */ -- cgit v1.2.3 From 57292b58ddb58689e8c3b4c6eadbef10d9ca44dd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 31 Jan 2017 16:57:29 +0100 Subject: block: introduce blk_rq_is_passthrough This can be used to check for fs vs non-fs requests and basically removes all knowledge of BLOCK_PC specific from the block layer, as well as preparing for removing the cmd_type field in struct request. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 16 +++++++++++----- include/linux/blktrace_api.h | 4 ++-- 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e4c5f284fe2d..7121be081517 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -242,6 +242,11 @@ struct request { struct request *next_rq; }; +static inline bool blk_rq_is_passthrough(struct request *rq) +{ + return rq->cmd_type != REQ_TYPE_FS; +} + static inline unsigned short req_get_ioprio(struct request *req) { return req->ioprio; @@ -698,9 +703,10 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ REQ_FAILFAST_DRIVER)) -#define blk_account_rq(rq) \ - (((rq)->rq_flags & RQF_STARTED) && \ - ((rq)->cmd_type == REQ_TYPE_FS)) +static inline bool blk_account_rq(struct request *rq) +{ + return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); +} #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) @@ -775,7 +781,7 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync) static inline bool rq_mergeable(struct request *rq) { - if (rq->cmd_type != REQ_TYPE_FS) + if (blk_rq_is_passthrough(rq)) return false; if (req_op(rq) == REQ_OP_FLUSH) @@ -1049,7 +1055,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq, { struct request_queue *q = rq->q; - if (unlikely(rq->cmd_type != REQ_TYPE_FS)) + if (blk_rq_is_passthrough(rq)) return q->limits.max_hw_sectors; if (!q->limits.chunk_sectors || diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index a143a67a6f33..341f9418bd68 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -114,12 +114,12 @@ extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes); static inline sector_t blk_rq_trace_sector(struct request *rq) { - return (rq->cmd_type != REQ_TYPE_FS) ? 0 : blk_rq_pos(rq); + return blk_rq_is_passthrough(rq) ? 0 : blk_rq_pos(rq); } static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq) { - return (rq->cmd_type != REQ_TYPE_FS) ? 0 : blk_rq_sectors(rq); + return blk_rq_is_passthrough(rq) ? 0 : blk_rq_sectors(rq); } #endif -- cgit v1.2.3 From 2f5a8e80f79dc82e00f4cca557dc9ceaf064b450 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 31 Jan 2017 16:57:30 +0100 Subject: ide: don't abuse cmd_type Currently the legacy ide driver defines several request types of it's own, which is in the way of removing that field entirely. Instead add a type field to struct ide_request and use that to distinguish the different types of IDE-internal requests. It's a bit of a mess, but so is the surrounding code.. Signed-off-by: Christoph Hellwig Acked-by: David S. Miller Signed-off-by: Jens Axboe --- include/linux/ide.h | 56 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 086fbe172817..5cc6caa94cac 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -40,24 +40,58 @@ struct device; -/* IDE-specific values for req->cmd_type */ -enum ata_cmd_type_bits { - REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1, - REQ_TYPE_ATA_PC, - REQ_TYPE_ATA_SENSE, /* sense request */ - REQ_TYPE_ATA_PM_SUSPEND,/* suspend request */ - REQ_TYPE_ATA_PM_RESUME, /* resume request */ +/* values for ide_request.type */ +enum ata_priv_type { + ATA_PRIV_MISC, + ATA_PRIV_TASKFILE, + ATA_PRIV_PC, + ATA_PRIV_SENSE, /* sense request */ + ATA_PRIV_PM_SUSPEND, /* suspend request */ + ATA_PRIV_PM_RESUME, /* resume request */ }; -#define ata_pm_request(rq) \ - ((rq)->cmd_type == REQ_TYPE_ATA_PM_SUSPEND || \ - (rq)->cmd_type == REQ_TYPE_ATA_PM_RESUME) - struct ide_request { struct scsi_request sreq; u8 sense[SCSI_SENSE_BUFFERSIZE]; + u8 type; }; +static inline struct ide_request *ide_req(struct request *rq) +{ + return blk_mq_rq_to_pdu(rq); +} + +static inline bool ata_misc_request(struct request *rq) +{ + return rq->cmd_type == REQ_TYPE_DRV_PRIV && + ide_req(rq)->type == ATA_PRIV_MISC; +} + +static inline bool ata_taskfile_request(struct request *rq) +{ + return rq->cmd_type == REQ_TYPE_DRV_PRIV && + ide_req(rq)->type == ATA_PRIV_TASKFILE; +} + +static inline bool ata_pc_request(struct request *rq) +{ + return rq->cmd_type == REQ_TYPE_DRV_PRIV && + ide_req(rq)->type == ATA_PRIV_PC; +} + +static inline bool ata_sense_request(struct request *rq) +{ + return rq->cmd_type == REQ_TYPE_DRV_PRIV && + ide_req(rq)->type == ATA_PRIV_SENSE; +} + +static inline bool ata_pm_request(struct request *rq) +{ + return rq->cmd_type == REQ_TYPE_DRV_PRIV && + (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND || + ide_req(rq)->type == ATA_PRIV_PM_RESUME); +} + /* Error codes returned in rq->errors to the higher part of the driver. */ enum { IDE_DRV_ERROR_GENERAL = 101, -- cgit v1.2.3 From aebf526b53aea164508730427597d45f3e06b376 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 31 Jan 2017 16:57:31 +0100 Subject: block: fold cmd_type into the REQ_OP_ space Instead of keeping two levels of indirection for requests types, fold it all into the operations. The little caveat here is that previously cmd_type only applied to struct request, while the request and bio op fields were set to plain REQ_OP_READ/WRITE even for passthrough operations. Instead this patch adds new REQ_OP_* for SCSI passthrough and driver private requests, althought it has to add two for each so that we can communicate the data in/out nature of the request. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 7 +++++++ include/linux/blkdev.h | 22 +++++++++++----------- include/linux/ide.h | 14 +++++--------- 3 files changed, 23 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 37c9a43c5e78..d703acb55d0f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -162,6 +162,13 @@ enum req_opf { /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = 8, + /* SCSI passthrough using struct scsi_request */ + REQ_OP_SCSI_IN = 32, + REQ_OP_SCSI_OUT = 33, + /* Driver private requests */ + REQ_OP_DRV_IN = 34, + REQ_OP_DRV_OUT = 35, + REQ_OP_LAST, }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7121be081517..1e947e725528 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -70,15 +70,6 @@ struct request_list { unsigned int flags; }; -/* - * request command types - */ -enum rq_cmd_type_bits { - REQ_TYPE_FS = 1, /* fs request */ - REQ_TYPE_BLOCK_PC, /* scsi command */ - REQ_TYPE_DRV_PRIV, /* driver defined types from here */ -}; - /* * request flags */ typedef __u32 __bitwise req_flags_t; @@ -145,7 +136,6 @@ struct request { struct blk_mq_ctx *mq_ctx; int cpu; - unsigned cmd_type; unsigned int cmd_flags; /* op and common flags */ req_flags_t rq_flags; unsigned long atomic_flags; @@ -242,9 +232,19 @@ struct request { struct request *next_rq; }; +static inline bool blk_rq_is_scsi(struct request *rq) +{ + return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT; +} + +static inline bool blk_rq_is_private(struct request *rq) +{ + return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT; +} + static inline bool blk_rq_is_passthrough(struct request *rq) { - return rq->cmd_type != REQ_TYPE_FS; + return blk_rq_is_scsi(rq) || blk_rq_is_private(rq); } static inline unsigned short req_get_ioprio(struct request *req) diff --git a/include/linux/ide.h b/include/linux/ide.h index 5cc6caa94cac..2f51c1724b5a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -63,31 +63,27 @@ static inline struct ide_request *ide_req(struct request *rq) static inline bool ata_misc_request(struct request *rq) { - return rq->cmd_type == REQ_TYPE_DRV_PRIV && - ide_req(rq)->type == ATA_PRIV_MISC; + return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_MISC; } static inline bool ata_taskfile_request(struct request *rq) { - return rq->cmd_type == REQ_TYPE_DRV_PRIV && - ide_req(rq)->type == ATA_PRIV_TASKFILE; + return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_TASKFILE; } static inline bool ata_pc_request(struct request *rq) { - return rq->cmd_type == REQ_TYPE_DRV_PRIV && - ide_req(rq)->type == ATA_PRIV_PC; + return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_PC; } static inline bool ata_sense_request(struct request *rq) { - return rq->cmd_type == REQ_TYPE_DRV_PRIV && - ide_req(rq)->type == ATA_PRIV_SENSE; + return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_SENSE; } static inline bool ata_pm_request(struct request *rq) { - return rq->cmd_type == REQ_TYPE_DRV_PRIV && + return blk_rq_is_private(rq) && (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND || ide_req(rq)->type == ATA_PRIV_PM_RESUME); } -- cgit v1.2.3 From d486f1f204382557b5fbcb3ddbb5845cd4ba4e2c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 31 Jan 2017 12:34:41 -0700 Subject: block: move internal_tag to same cache line as tag Since we removed cmd_type, we now have a hole in the struct. Move the internal_tag member to the same cacheline as tag, since we use them at the same time. This doesn't fix the hole, just moves it elsewhere. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1e947e725528..11f7a8e86a89 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -138,6 +138,9 @@ struct request { int cpu; unsigned int cmd_flags; /* op and common flags */ req_flags_t rq_flags; + + int internal_tag; + unsigned long atomic_flags; /* the following two fields are internal, NEVER access directly */ @@ -209,8 +212,6 @@ struct request { unsigned short ioprio; - int internal_tag; - void *special; /* opaque pointer available for LLD use */ int errors; -- cgit v1.2.3 From dd86e373e09fb16b83e8adf5c48c421a4ca76468 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 23:58:38 +0100 Subject: perf/x86/intel/rapl: Make package handling more robust The package management code in RAPL relies on package mapping being available before a CPU is started. This changed with: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") because the ACPI/BIOS information turned out to be unreliable, but that left RAPL in broken state. This was not noticed because on a regular boot all CPUs are online before RAPL is initialized. A possible fix would be to reintroduce the mess which allocates a package data structure in CPU prepare and when it turns out to already exist in starting throw it away later in the CPU online callback. But that's a horrible hack and not required at all because RAPL becomes functional for perf only in the CPU online callback. That's correct because user space is not yet informed about the CPU being onlined, so nothing caan rely on RAPL being available on that particular CPU. Move the allocation to the CPU online callback and simplify the hotplug handling. At this point the package mapping is established and correct. This also adds a missing check for available package data in the event_init() function. Reported-by: Yasuaki Ishimatsu Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Stephane Eranian Cc: Vince Weaver Fixes: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") Link: http://lkml.kernel.org/r/20170131230141.212593966@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d936a0021839..8329f3dc592c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -10,7 +10,6 @@ enum cpuhp_state { CPUHP_PERF_X86_PREPARE, CPUHP_PERF_X86_UNCORE_PREP, CPUHP_PERF_X86_AMD_UNCORE_PREP, - CPUHP_PERF_X86_RAPL_PREP, CPUHP_PERF_BFIN, CPUHP_PERF_POWER, CPUHP_PERF_SUPERH, -- cgit v1.2.3 From fff4b87e594ad3d2e4f51e8d3d86a6f9d3d8b654 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 23:58:40 +0100 Subject: perf/x86/intel/uncore: Make package handling more robust The package management code in uncore relies on package mapping being available before a CPU is started. This changed with: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") because the ACPI/BIOS information turned out to be unreliable, but that left uncore in broken state. This was not noticed because on a regular boot all CPUs are online before uncore is initialized. Move the allocation to the CPU online callback and simplify the hotplug handling. At this point the package mapping is established and correct. Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Stephane Eranian Cc: Vince Weaver Cc: Yasuaki Ishimatsu Fixes: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") Link: http://lkml.kernel.org/r/20170131230141.377156255@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/cpuhotplug.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 8329f3dc592c..921acaaa1601 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -8,7 +8,6 @@ enum cpuhp_state { CPUHP_CREATE_THREADS, CPUHP_PERF_PREPARE, CPUHP_PERF_X86_PREPARE, - CPUHP_PERF_X86_UNCORE_PREP, CPUHP_PERF_X86_AMD_UNCORE_PREP, CPUHP_PERF_BFIN, CPUHP_PERF_POWER, @@ -85,7 +84,6 @@ enum cpuhp_state { CPUHP_AP_IRQ_ARMADA_XP_STARTING, CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, - CPUHP_AP_PERF_X86_UNCORE_STARTING, CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, CPUHP_AP_PERF_X86_STARTING, CPUHP_AP_PERF_X86_AMD_IBS_STARTING, -- cgit v1.2.3 From db4545d9a7881db0a7e18599e6cd1adbcb93db33 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 31 Jan 2017 13:21:34 +0000 Subject: x86/efi: Deduplicate efi_char16_printk() Eliminate the separate 32-bit and 64x- bit code paths by way of the shiny new efi_call_proto() macro. No functional change intended. Signed-off-by: Lukas Wunner Signed-off-by: Matt Fleming Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1485868902-20401-3-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 5b1af30ece55..6642c4d9d11d 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1240,17 +1240,17 @@ struct efivar_entry { bool deleting; }; -struct efi_simple_text_output_protocol_32 { +typedef struct { u32 reset; u32 output_string; u32 test_string; -}; +} efi_simple_text_output_protocol_32_t; -struct efi_simple_text_output_protocol_64 { +typedef struct { u64 reset; u64 output_string; u64 test_string; -}; +} efi_simple_text_output_protocol_64_t; struct efi_simple_text_output_protocol { void *reset; -- cgit v1.2.3 From a19ebf59e20880c87dd49b6336476307559ac5ba Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Tue, 31 Jan 2017 13:21:36 +0000 Subject: efi: Introduce the EFI_MEM_ATTR bit and set it from the memory attributes table UEFI v2.6 introduces a configuration table called EFI_MEMORY_ATTRIBUTES_TABLE which provides additional information about EFI runtime regions. Currently this table describes memory protections that may be applied to the EFI Runtime code and data regions by the kernel. Allocate a EFI_XXX bit to keep track of whether this feature is published by firmware or not. Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Matt Fleming Signed-off-by: Ard Biesheuvel Reviewed-by: Lee, Chun-Yi Cc: Borislav Petkov Cc: Fenghua Yu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1485868902-20401-5-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 6642c4d9d11d..5f632bf9969d 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1065,6 +1065,7 @@ extern int __init efi_setup_pcdp_console(char *); #define EFI_ARCH_1 7 /* First arch-specific bit */ #define EFI_DBG 8 /* Print additional debug info at runtime */ #define EFI_NX_PE_DATA 9 /* Can runtime data regions be mapped non-executable? */ +#define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */ #ifdef CONFIG_EFI /* -- cgit v1.2.3 From c4c39c70c5fef43655019236bec8ba5e7273b868 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 31 Jan 2017 13:21:39 +0000 Subject: efi: Use typed function pointers for the runtime services table Instead of using void pointers, and casting them to correctly typed function pointers upon use, declare the runtime services pointers as function pointers using their respective prototypes, for which typedefs are already available. Signed-off-by: Ard Biesheuvel Reviewed-by: Matt Fleming Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1485868902-20401-8-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 5f632bf9969d..85e9fdaa8d07 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -509,24 +509,6 @@ typedef struct { u64 query_variable_info; } efi_runtime_services_64_t; -typedef struct { - efi_table_hdr_t hdr; - void *get_time; - void *set_time; - void *get_wakeup_time; - void *set_wakeup_time; - void *set_virtual_address_map; - void *convert_pointer; - void *get_variable; - void *get_next_variable; - void *set_variable; - void *get_next_high_mono_count; - void *reset_system; - void *update_capsule; - void *query_capsule_caps; - void *query_variable_info; -} efi_runtime_services_t; - typedef efi_status_t efi_get_time_t (efi_time_t *tm, efi_time_cap_t *tc); typedef efi_status_t efi_set_time_t (efi_time_t *tm); typedef efi_status_t efi_get_wakeup_time_t (efi_bool_t *enabled, efi_bool_t *pending, @@ -561,6 +543,24 @@ typedef efi_status_t efi_query_variable_store_t(u32 attributes, unsigned long size, bool nonblocking); +typedef struct { + efi_table_hdr_t hdr; + efi_get_time_t *get_time; + efi_set_time_t *set_time; + efi_get_wakeup_time_t *get_wakeup_time; + efi_set_wakeup_time_t *set_wakeup_time; + efi_set_virtual_address_map_t *set_virtual_address_map; + void *convert_pointer; + efi_get_variable_t *get_variable; + efi_get_next_variable_t *get_next_variable; + efi_set_variable_t *set_variable; + efi_get_next_high_mono_count_t *get_next_high_mono_count; + efi_reset_system_t *reset_system; + efi_update_capsule_t *update_capsule; + efi_query_capsule_caps_t *query_capsule_caps; + efi_query_variable_info_t *query_variable_info; +} efi_runtime_services_t; + void efi_native_runtime_setup(void); /* -- cgit v1.2.3 From 7b0a911478c74ca02581d496f732c10e811e894f Mon Sep 17 00:00:00 2001 From: Dave Young Date: Tue, 31 Jan 2017 13:21:40 +0000 Subject: efi/x86: Move the EFI BGRT init code to early init code Before invoking the arch specific handler, efi_mem_reserve() reserves the given memory region through memblock. efi_bgrt_init() will call efi_mem_reserve() after mm_init(), at which time memblock is dead and should not be used anymore. The EFI BGRT code depends on ACPI initialization to get the BGRT ACPI table, so move parsing of the BGRT table to ACPI early boot code to ensure that efi_mem_reserve() in EFI BGRT code still use memblock safely. Tested-by: Bhupesh Sharma Signed-off-by: Dave Young Signed-off-by: Ard Biesheuvel Cc: Len Brown Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: linux-acpi@vger.kernel.org Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1485868902-20401-9-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi-bgrt.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi-bgrt.h b/include/linux/efi-bgrt.h index 051b21fedf68..2fd3993c370b 100644 --- a/include/linux/efi-bgrt.h +++ b/include/linux/efi-bgrt.h @@ -1,20 +1,19 @@ #ifndef _LINUX_EFI_BGRT_H #define _LINUX_EFI_BGRT_H -#ifdef CONFIG_ACPI_BGRT - #include -void efi_bgrt_init(void); +#ifdef CONFIG_ACPI_BGRT + +void efi_bgrt_init(struct acpi_table_header *table); /* The BGRT data itself; only valid if bgrt_image != NULL. */ -extern void *bgrt_image; extern size_t bgrt_image_size; -extern struct acpi_table_bgrt *bgrt_tab; +extern struct acpi_table_bgrt bgrt_tab; #else /* !CONFIG_ACPI_BGRT */ -static inline void efi_bgrt_init(void) {} +static inline void efi_bgrt_init(struct acpi_table_header *table) {} #endif /* !CONFIG_ACPI_BGRT */ -- cgit v1.2.3 From 07e5f5e353aaa61696c8353d87050994a0c4648a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:17 +0100 Subject: time: Introduce jiffies64_to_nsecs() This will be needed for the cputime_t to nsec conversion. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Tony Luck Cc: Fenghua Yu Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/jiffies.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 589d14e970ad..624215cebee5 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -293,6 +293,8 @@ static inline u64 jiffies_to_nsecs(const unsigned long j) return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC; } +extern u64 jiffies64_to_nsecs(u64 j); + extern unsigned long __msecs_to_jiffies(const unsigned int m); #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) /* -- cgit v1.2.3 From ba03ce822db234f8acb559de4a317a5c1f95c029 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:18 +0100 Subject: sched/cputime: Remove the unused INIT_CPUTIME macro It's a leftover from removed code. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Tony Luck Cc: Fenghua Yu Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-3-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 69e6852fede1..5f60aed37701 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -620,13 +620,6 @@ struct task_cputime { #define prof_exp stime #define sched_exp sum_exec_runtime -#define INIT_CPUTIME \ - (struct task_cputime) { \ - .utime = 0, \ - .stime = 0, \ - .sum_exec_runtime = 0, \ - } - /* * This is the atomic variant of task_cputime, which can be used for * storing and updating task_cputime statistics without locking. -- cgit v1.2.3 From 16a6d9be90373fb0b521850cd0185a4d460dd152 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:21 +0100 Subject: sched/cputime: Convert guest time accounting to nsecs (u64) cputime_t is being obsolete and replaced by nsecs units in order to make internal timestamps less opaque and more granular. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Tony Luck Cc: Fenghua Yu Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-6-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5f60aed37701..252ff25983c8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -774,8 +774,8 @@ struct signal_struct { */ seqlock_t stats_lock; cputime_t utime, stime, cutime, cstime; - cputime_t gtime; - cputime_t cgtime; + u64 gtime; + u64 cgtime; struct prev_cputime prev_cputime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; @@ -1658,7 +1658,7 @@ struct task_struct { #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME cputime_t utimescaled, stimescaled; #endif - cputime_t gtime; + u64 gtime; struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN seqcount_t vtime_seqcount; @@ -2254,7 +2254,7 @@ struct task_struct *try_get_task_struct(struct task_struct **ptask); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime); -extern cputime_t task_gtime(struct task_struct *t); +extern u64 task_gtime(struct task_struct *t); #else static inline void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) @@ -2263,7 +2263,7 @@ static inline void task_cputime(struct task_struct *t, *stime = t->stime; } -static inline cputime_t task_gtime(struct task_struct *t) +static inline u64 task_gtime(struct task_struct *t) { return t->gtime; } -- cgit v1.2.3 From a1cecf2ba78e0a6de00ff99df34b662728535aa5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:22 +0100 Subject: sched/cputime: Introduce special task_cputime_t() API to return old-typed cputime This API returns a task's cputime in cputime_t in order to ease the conversion of cputime internals to use nsecs units instead. Blindly converting all cputime readers to use this API now will later let us convert more smoothly and step by step all these places to use the new nsec based cputime. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-7-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 252ff25983c8..9cc722f77799 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -615,6 +615,13 @@ struct task_cputime { unsigned long long sum_exec_runtime; }; +/* Temporary type to ease cputime_t to nsecs conversion */ +struct task_cputime_t { + cputime_t utime; + cputime_t stime; + unsigned long long sum_exec_runtime; +}; + /* Alternate field names when used to cache expirations. */ #define virt_exp utime #define prof_exp stime @@ -748,7 +755,7 @@ struct signal_struct { struct thread_group_cputimer cputimer; /* Earliest-expiration cache. */ - struct task_cputime cputime_expires; + struct task_cputime_t cputime_expires; #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; @@ -1682,7 +1689,7 @@ struct task_struct { /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; - struct task_cputime cputime_expires; + struct task_cputime_t cputime_expires; struct list_head cpu_timers[3]; /* process credentials */ @@ -2286,6 +2293,19 @@ static inline void task_cputime_scaled(struct task_struct *t, } #endif +static inline void task_cputime_t(struct task_struct *t, + cputime_t *utime, cputime_t *stime) +{ + task_cputime(t, utime, stime); +} + +static inline void task_cputime_t_scaled(struct task_struct *t, + cputime_t *utimescaled, + cputime_t *stimescaled) +{ + task_cputime_scaled(t, utimescaled, stimescaled); +} + extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); @@ -3499,7 +3519,13 @@ static __always_inline bool need_resched(void) * Thread group CPU time accounting. */ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); -void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); +void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times); + +static inline void thread_group_cputime_t(struct task_struct *tsk, + struct task_cputime_t *times) +{ + thread_group_cputime(tsk, (struct task_cputime *)times); +} /* * Reevaluate whether the task has signals pending delivery. -- cgit v1.2.3 From 5613fda9a503cd6137b120298902a34a1386b2c1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:23 +0100 Subject: sched/cputime: Convert task/group cputime to nsecs Now that most cputime readers use the transition API which return the task cputime in old style cputime_t, we can safely store the cputime in nsecs. This will eventually make cputime statistics less opaque and more granular. Back and forth convertions between cputime_t and nsecs in order to deal with cputime_t random granularity won't be needed anymore. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-8-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 55 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9cc722f77799..b7ccc54b35cc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -585,8 +585,8 @@ struct cpu_itimer { */ struct prev_cputime { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - cputime_t utime; - cputime_t stime; + u64 utime; + u64 stime; raw_spinlock_t lock; #endif }; @@ -601,8 +601,8 @@ static inline void prev_cputime_init(struct prev_cputime *prev) /** * struct task_cputime - collected CPU time counts - * @utime: time spent in user mode, in &cputime_t units - * @stime: time spent in kernel mode, in &cputime_t units + * @utime: time spent in user mode, in nanoseconds + * @stime: time spent in kernel mode, in nanoseconds * @sum_exec_runtime: total time spent on the CPU, in nanoseconds * * This structure groups together three kinds of CPU time that are tracked for @@ -610,8 +610,8 @@ static inline void prev_cputime_init(struct prev_cputime *prev) * these counts together and treat all three of them in parallel. */ struct task_cputime { - cputime_t utime; - cputime_t stime; + u64 utime; + u64 stime; unsigned long long sum_exec_runtime; }; @@ -780,7 +780,7 @@ struct signal_struct { * in __exit_signal, except for the group leader. */ seqlock_t stats_lock; - cputime_t utime, stime, cutime, cstime; + u64 utime, stime, cutime, cstime; u64 gtime; u64 cgtime; struct prev_cputime prev_cputime; @@ -1661,9 +1661,9 @@ struct task_struct { int __user *set_child_tid; /* CLONE_CHILD_SETTID */ int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ - cputime_t utime, stime; + u64 utime, stime; #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME - cputime_t utimescaled, stimescaled; + u64 utimescaled, stimescaled; #endif u64 gtime; struct prev_cputime prev_cputime; @@ -2260,11 +2260,11 @@ struct task_struct *try_get_task_struct(struct task_struct **ptask); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void task_cputime(struct task_struct *t, - cputime_t *utime, cputime_t *stime); + u64 *utime, u64 *stime); extern u64 task_gtime(struct task_struct *t); #else static inline void task_cputime(struct task_struct *t, - cputime_t *utime, cputime_t *stime) + u64 *utime, u64 *stime) { *utime = t->utime; *stime = t->stime; @@ -2278,16 +2278,16 @@ static inline u64 task_gtime(struct task_struct *t) #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME static inline void task_cputime_scaled(struct task_struct *t, - cputime_t *utimescaled, - cputime_t *stimescaled) + u64 *utimescaled, + u64 *stimescaled) { *utimescaled = t->utimescaled; *stimescaled = t->stimescaled; } #else static inline void task_cputime_scaled(struct task_struct *t, - cputime_t *utimescaled, - cputime_t *stimescaled) + u64 *utimescaled, + u64 *stimescaled) { task_cputime(t, utimescaled, stimescaled); } @@ -2296,18 +2296,26 @@ static inline void task_cputime_scaled(struct task_struct *t, static inline void task_cputime_t(struct task_struct *t, cputime_t *utime, cputime_t *stime) { - task_cputime(t, utime, stime); + u64 ut, st; + + task_cputime(t, &ut, &st); + *utime = nsecs_to_cputime(ut); + *stime = nsecs_to_cputime(st); } static inline void task_cputime_t_scaled(struct task_struct *t, cputime_t *utimescaled, cputime_t *stimescaled) { - task_cputime_scaled(t, utimescaled, stimescaled); + u64 ut, st; + + task_cputime_scaled(t, &ut, &st); + *utimescaled = nsecs_to_cputime(ut); + *stimescaled = nsecs_to_cputime(st); } -extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); -extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); +extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); +extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); /* * Per process flags @@ -3522,9 +3530,14 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times); static inline void thread_group_cputime_t(struct task_struct *tsk, - struct task_cputime_t *times) + struct task_cputime_t *cputime) { - thread_group_cputime(tsk, (struct task_cputime *)times); + struct task_cputime times; + + thread_group_cputime(tsk, ×); + cputime->utime = nsecs_to_cputime(times.utime); + cputime->stime = nsecs_to_cputime(times.stime); + cputime->sum_exec_runtime = times.sum_exec_runtime; } /* -- cgit v1.2.3 From cd19c364b313c179410fcac8376330964cc9bfd9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:27 +0100 Subject: fs/binfmt: Convert obsolete cputime type to nsecs Use the new nsec based cputime accessors as part of the whole cputime conversion from cputime_t to nsecs. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-12-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/compat.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 63609398ef9f..9e40be522793 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -731,7 +731,25 @@ asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, static inline bool in_compat_syscall(void) { return is_compat_task(); } #endif -#else +/** + * ns_to_compat_timeval - Compat version of ns_to_timeval + * @nsec: the nanoseconds value to be converted + * + * Returns the compat_timeval representation of the nsec parameter. + */ +static inline struct compat_timeval ns_to_compat_timeval(s64 nsec) +{ + struct timeval tv; + struct compat_timeval ctv; + + tv = ns_to_timeval(nsec); + ctv.tv_sec = tv.tv_sec; + ctv.tv_usec = tv.tv_usec; + + return ctv; +} + +#else /* !CONFIG_COMPAT */ #define is_compat_task() (0) static inline bool in_compat_syscall(void) { return false; } -- cgit v1.2.3 From d4bc42af73bf297f7182ed978b19850553242195 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:28 +0100 Subject: acct: Convert obsolete cputime type to nsecs Use the new nsec based cputime accessors as part of the whole cputime conversion from cputime_t to nsecs. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-13-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b7ccc54b35cc..75fc773c158d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -563,7 +563,7 @@ struct pacct_struct { int ac_flag; long ac_exitcode; unsigned long ac_mem; - cputime_t ac_utime, ac_stime; + u64 ac_utime, ac_stime; unsigned long ac_minflt, ac_majflt; }; -- cgit v1.2.3 From 605dc2b31a2ab235570107cb650036b41e741165 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:30 +0100 Subject: tsacct: Convert obsolete cputime type to nsecs Use the new nsec based cputime accessors as part of the whole cputime conversion from cputime_t to nsecs. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-15-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 75fc773c158d..dc44366128d8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1815,7 +1815,7 @@ struct task_struct { #if defined(CONFIG_TASK_XACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ - cputime_t acct_timexpd; /* stime + utime since last update */ + u64 acct_timexpd; /* stime + utime since last update */ #endif #ifdef CONFIG_CPUSETS nodemask_t mems_allowed; /* Protected by alloc_lock */ -- cgit v1.2.3 From ebd7e7fc4bc63be5eaf9da903b8060b02dd711ea Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:34 +0100 Subject: timers/posix-timers: Convert internals to use nsecs Use the new nsec based cputime accessors as part of the whole cputime conversion from cputime_t to nsecs. Also convert posix-cpu-timers to use nsec based internal counters to simplify it. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-19-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/posix-timers.h | 12 +----------- include/linux/sched.h | 6 +++--- 2 files changed, 4 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 62d44c176071..890de52362d0 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -8,19 +8,9 @@ #include -static inline unsigned long long cputime_to_expires(cputime_t expires) -{ - return (__force unsigned long long)expires; -} - -static inline cputime_t expires_to_cputime(unsigned long long expires) -{ - return (__force cputime_t)expires; -} - struct cpu_timer_list { struct list_head entry; - unsigned long long expires, incr; + u64 expires, incr; struct task_struct *task; int firing; }; diff --git a/include/linux/sched.h b/include/linux/sched.h index dc44366128d8..baa6a2834e0f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -755,7 +755,7 @@ struct signal_struct { struct thread_group_cputimer cputimer; /* Earliest-expiration cache. */ - struct task_cputime_t cputime_expires; + struct task_cputime cputime_expires; #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; @@ -1689,7 +1689,7 @@ struct task_struct { /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; - struct task_cputime_t cputime_expires; + struct task_cputime cputime_expires; struct list_head cpu_timers[3]; /* process credentials */ @@ -3527,7 +3527,7 @@ static __always_inline bool need_resched(void) * Thread group CPU time accounting. */ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); -void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times); +void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); static inline void thread_group_cputime_t(struct task_struct *tsk, struct task_cputime_t *cputime) -- cgit v1.2.3 From 858cf3a8c59968e7c5f7c1a1192459a0d52d1ab4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:35 +0100 Subject: timers/itimer: Convert internal cputime_t units to nsec Use the new nsec based cputime accessors as part of the whole cputime conversion from cputime_t to nsecs. Also convert itimers to use nsec based internal counters. This simplifies it and removes the whole game with error/inc_error which served to deal with cputime_t random granularity. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-20-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/posix-timers.h | 2 +- include/linux/sched.h | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 890de52362d0..64aa189efe21 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -119,7 +119,7 @@ void run_posix_cpu_timers(struct task_struct *task); void posix_cpu_timers_exit(struct task_struct *task); void posix_cpu_timers_exit_group(struct task_struct *task); void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, - cputime_t *newval, cputime_t *oldval); + u64 *newval, u64 *oldval); long clock_nanosleep_restart(struct restart_block *restart_block); diff --git a/include/linux/sched.h b/include/linux/sched.h index baa6a2834e0f..268fdd713089 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -568,10 +568,8 @@ struct pacct_struct { }; struct cpu_itimer { - cputime_t expires; - cputime_t incr; - u32 error; - u32 incr_error; + u64 expires; + u64 incr; }; /** -- cgit v1.2.3 From 71ea47b197de9a53bc3747a8b1c667df9c6a4c68 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:36 +0100 Subject: sched/cputime: Remove temporary cputime_t accessors Now that the whole cputime conversion to nsec units is complete, we can remove the compatibility accessors. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-21-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 40 ---------------------------------------- 1 file changed, 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 268fdd713089..d17645402767 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -29,7 +29,6 @@ struct sched_param { #include #include -#include #include #include @@ -613,13 +612,6 @@ struct task_cputime { unsigned long long sum_exec_runtime; }; -/* Temporary type to ease cputime_t to nsecs conversion */ -struct task_cputime_t { - cputime_t utime; - cputime_t stime; - unsigned long long sum_exec_runtime; -}; - /* Alternate field names when used to cache expirations. */ #define virt_exp utime #define prof_exp stime @@ -2291,27 +2283,6 @@ static inline void task_cputime_scaled(struct task_struct *t, } #endif -static inline void task_cputime_t(struct task_struct *t, - cputime_t *utime, cputime_t *stime) -{ - u64 ut, st; - - task_cputime(t, &ut, &st); - *utime = nsecs_to_cputime(ut); - *stime = nsecs_to_cputime(st); -} - -static inline void task_cputime_t_scaled(struct task_struct *t, - cputime_t *utimescaled, - cputime_t *stimescaled) -{ - u64 ut, st; - - task_cputime_scaled(t, &ut, &st); - *utimescaled = nsecs_to_cputime(ut); - *stimescaled = nsecs_to_cputime(st); -} - extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); @@ -3527,17 +3498,6 @@ static __always_inline bool need_resched(void) void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); -static inline void thread_group_cputime_t(struct task_struct *tsk, - struct task_cputime_t *cputime) -{ - struct task_cputime times; - - thread_group_cputime(tsk, ×); - cputime->utime = nsecs_to_cputime(times.utime); - cputime->stime = nsecs_to_cputime(times.stime); - cputime->sum_exec_runtime = times.sum_exec_runtime; -} - /* * Reevaluate whether the task has signals pending delivery. * Wake the task if so. -- cgit v1.2.3 From 23244a5c8003d4154161a8289a7d3783b0237c08 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:37 +0100 Subject: sched/cputime: Push time to account_user_time() in nsecs This is one more step toward converting cputime accounting to pure nsecs. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-22-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index c3e38ded2d73..b716001ac23e 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -78,7 +78,7 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) return kstat_cpu(cpu).irqs_sum; } -extern void account_user_time(struct task_struct *, cputime_t); +extern void account_user_time(struct task_struct *, u64); extern void account_guest_time(struct task_struct *, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t); extern void account_system_index_time(struct task_struct *, cputime_t, -- cgit v1.2.3 From be9095ed4fb3cf69e9fdf64e28ff6b5bd0ec7215 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:38 +0100 Subject: sched/cputime: Push time to account_steal_time() in nsecs This is one more step toward converting cputime accounting to pure nsecs. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-23-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index b716001ac23e..1d55d10abf9d 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -83,7 +83,7 @@ extern void account_guest_time(struct task_struct *, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t); extern void account_system_index_time(struct task_struct *, cputime_t, enum cpu_usage_stat); -extern void account_steal_time(cputime_t); +extern void account_steal_time(u64); extern void account_idle_time(cputime_t); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -- cgit v1.2.3 From 18b43a9bd7ae91185e398dd983fb4fffb9e81b3a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:39 +0100 Subject: sched/cputime: Push time to account_idle_time() in nsecs This is one more step toward converting cputime accounting to pure nsecs. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-24-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 1d55d10abf9d..e1cd8970e096 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -84,7 +84,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t); extern void account_system_index_time(struct task_struct *, cputime_t, enum cpu_usage_stat); extern void account_steal_time(u64); -extern void account_idle_time(cputime_t); +extern void account_idle_time(u64); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static inline void account_process_tick(struct task_struct *tsk, int user) -- cgit v1.2.3 From fb8b049c988f1ff460b063b8a41ea9a3c79921c2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:40 +0100 Subject: sched/cputime: Push time to account_system_time() in nsecs This is one more step toward converting cputime accounting to pure nsecs. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-25-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index e1cd8970e096..66be8b6beceb 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -9,7 +9,6 @@ #include #include #include -#include /* * 'kernel_stat.h' contains the definitions needed for doing @@ -79,9 +78,9 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) } extern void account_user_time(struct task_struct *, u64); -extern void account_guest_time(struct task_struct *, cputime_t); -extern void account_system_time(struct task_struct *, int, cputime_t); -extern void account_system_index_time(struct task_struct *, cputime_t, +extern void account_guest_time(struct task_struct *, u64); +extern void account_system_time(struct task_struct *, int, u64); +extern void account_system_index_time(struct task_struct *, u64, enum cpu_usage_stat); extern void account_steal_time(u64); extern void account_idle_time(u64); -- cgit v1.2.3 From 934ad473552a48d303a54279518aa19cf674567d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:50 +0100 Subject: sched/cputime: Remove unused nsec_to_cputime() It's unused now. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-35-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/cputime.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cputime.h b/include/linux/cputime.h index f2eb2ee535ca..a257d6690621 100644 --- a/include/linux/cputime.h +++ b/include/linux/cputime.h @@ -8,9 +8,4 @@ (cputime_to_usecs(__ct) * NSEC_PER_USEC) #endif -#ifndef nsecs_to_cputime -# define nsecs_to_cputime(__nsecs) \ - usecs_to_cputime((__nsecs) / NSEC_PER_USEC) -#endif - #endif /* __LINUX_CPUTIME_H */ -- cgit v1.2.3 From b672592f022152155fde7db99aafbcf04a2c3ba5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:51 +0100 Subject: sched/cputime: Remove generic asm headers cputime_t is now only used by two architectures: * powerpc (when CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y) * s390 And since the core doesn't use it anymore, we don't need any arch support from the others. So we can remove their stub implementations. A final cleanup would be to provide an efficient pure arch implementation of cputime_to_nsec() for s390 and powerpc and finally remove include/linux/cputime.h . Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-36-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/cputime.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cputime.h b/include/linux/cputime.h index a257d6690621..a691dc4ddc13 100644 --- a/include/linux/cputime.h +++ b/include/linux/cputime.h @@ -1,6 +1,7 @@ #ifndef __LINUX_CPUTIME_H #define __LINUX_CPUTIME_H +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #include #ifndef cputime_to_nsecs @@ -8,4 +9,5 @@ (cputime_to_usecs(__ct) * NSEC_PER_USEC) #endif +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* __LINUX_CPUTIME_H */ -- cgit v1.2.3 From 7e1f9467d1e48c64c27d6a32de1bfd1b9cdb1002 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 29 Jan 2017 07:42:12 -0800 Subject: sched/wait, rcuwait: Fix typo in comment Forgot to update the comment after renaming the call. Signed-off-by: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Link: http://lkml.kernel.org/r/1485704532-9290-1-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/rcuwait.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h index 0e93d56c7ab2..a4ede51b3e7c 100644 --- a/include/linux/rcuwait.h +++ b/include/linux/rcuwait.h @@ -47,7 +47,7 @@ extern void rcuwait_wake_up(struct rcuwait *w); for (;;) { \ /* \ * Implicit barrier (A) pairs with (B) in \ - * rcuwait_trywake(). \ + * rcuwait_wake_up(). \ */ \ set_current_state(TASK_UNINTERRUPTIBLE); \ if (condition) \ -- cgit v1.2.3 From 0754445d71c37a7afd4f0790a9be4cf53c1b8cc4 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 29 Jan 2017 07:15:31 -0800 Subject: sched/wake_q: Clarify queue reinit comment As of: bcc9a76d5ac ("locking/rwsem: Reinit wake_q after use") the comment regarding the list reinitialization no longer applies, update it with the new wake_q_init() helper. Signed-off-by: Davidlohr Bueso Acked-by: Waiman Long Cc: peterz@infradead.org Cc: longman@redhat.com Cc: akpm@linux-foundation.org Cc: paulmck@linux.vnet.ibm.com Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/20170129151531.GA2444@linux-80c1.suse Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4e62b378bd65..1cc0dede2122 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1000,8 +1000,8 @@ enum cpu_idle_type { * * The DEFINE_WAKE_Q macro declares and initializes the list head. * wake_up_q() does NOT reinitialize the list; it's expected to be - * called near the end of a function, where the fact that the queue is - * not used again will be easy to see by inspection. + * called near the end of a function. Otherwise, the list can be + * re-initialized for later re-use by wake_q_init(). * * Note that this can cause spurious wakeups. schedule() callers * must ensure the call is done inside a loop, confirming that the -- cgit v1.2.3 From 733ce725aa4bfa9063be053bfe7f4597d76f0dd1 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 27 Jan 2017 12:38:16 -0500 Subject: sched/clock: Add dummy clear_sched_clock_stable() stub function In commit: acb04058de494 ("sched/clock: Fix hotplug crash") the PARISC code gained a call to this function. However the prototype for it is within a CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y #ifdef/#endif. That, combined with this: arch/parisc/Kconfig: select HAVE_UNSTABLE_SCHED_CLOCK if SMP means that PARISC can have it either enabled or disabled, resulting in the following build fail: arch/parisc/kernel/setup.c:180:2: error: implicit declaration of function 'clear_sched_clock_stable' [-Werror=implicit-function-declaration] Add a no-op stub for the non-SMP case to prevent this. Signed-off-by: Paul Gortmaker Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: acb04058de49 ("sched/clock: Fix hotplug crash") Link: http://lkml.kernel.org/r/20170127173816.22733-1-paul.gortmaker@windriver.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d17645402767..e2ed46d3ed71 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2511,6 +2511,10 @@ static inline void sched_clock_tick(void) { } +static inline void clear_sched_clock_stable(void) +{ +} + static inline void sched_clock_idle_sleep_event(void) { } -- cgit v1.2.3 From 975e155ed8732cb81f55c021c441ae662dd040b5 Mon Sep 17 00:00:00 2001 From: Shile Zhang Date: Sat, 28 Jan 2017 22:00:49 +0800 Subject: sched/rt: Show the 'sched_rr_timeslice' SCHED_RR timeslice tuning knob in milliseconds We added the 'sched_rr_timeslice_ms' SCHED_RR tuning knob in this commit: ce0dbbbb30ae ("sched/rt: Add a tuning knob to allow changing SCHED_RR timeslice") ... which name suggests to users that it's in milliseconds, while in reality it's being set in milliseconds but the result is shown in jiffies. This is obviously confusing when HZ is not 1000, it makes it appear like the value set failed, such as HZ=100: root# echo 100 > /proc/sys/kernel/sched_rr_timeslice_ms root# cat /proc/sys/kernel/sched_rr_timeslice_ms 10 Fix this to be milliseconds all around. Signed-off-by: Shile Zhang Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1485612049-20923-1-git-send-email-shile.zhang@nokia.com Signed-off-by: Ingo Molnar --- include/linux/sched/sysctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 441145351301..49308e142aae 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -59,6 +59,7 @@ extern unsigned int sysctl_sched_cfs_bandwidth_slice; extern unsigned int sysctl_sched_autogroup_enabled; #endif +extern int sysctl_sched_rr_timeslice; extern int sched_rr_timeslice; extern int sched_rr_handler(struct ctl_table *table, int write, -- cgit v1.2.3 From 93faccbbfa958a9668d3ab4e30f38dd205cee8d8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 1 Feb 2017 06:06:16 +1300 Subject: fs: Better permission checking for submounts To support unprivileged users mounting filesystems two permission checks have to be performed: a test to see if the user allowed to create a mount in the mount namespace, and a test to see if the user is allowed to access the specified filesystem. The automount case is special in that mounting the original filesystem grants permission to mount the sub-filesystems, to any user who happens to stumble across the their mountpoint and satisfies the ordinary filesystem permission checks. Attempting to handle the automount case by using override_creds almost works. It preserves the idea that permission to mount the original filesystem is permission to mount the sub-filesystem. Unfortunately using override_creds messes up the filesystems ordinary permission checks. Solve this by being explicit that a mount is a submount by introducing vfs_submount, and using it where appropriate. vfs_submount uses a new mount internal mount flags MS_SUBMOUNT, to let sget and friends know that a mount is a submount so they can take appropriate action. sget and sget_userns are modified to not perform any permission checks on submounts. follow_automount is modified to stop using override_creds as that has proven problemantic. do_mount is modified to always remove the new MS_SUBMOUNT flag so that we know userspace will never by able to specify it. autofs4 is modified to stop using current_real_cred that was put in there to handle the previous version of submount permission checking. cifs is modified to pass the mountpoint all of the way down to vfs_submount. debugfs is modified to pass the mountpoint all of the way down to trace_automount by adding a new parameter. To make this change easier a new typedef debugfs_automount_t is introduced to capture the type of the debugfs automount function. Cc: stable@vger.kernel.org Fixes: 069d5ac9ae0d ("autofs: Fix automounts by using current_real_cred()->uid") Fixes: aeaa4a79ff6a ("fs: Call d_automount with the filesystems creds") Reviewed-by: Trond Myklebust Reviewed-by: Seth Forshee Signed-off-by: "Eric W. Biederman" --- include/linux/debugfs.h | 3 ++- include/linux/mount.h | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 014cc564d1c4..233006be30aa 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -97,9 +97,10 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, const char *dest); +typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, - struct vfsmount *(*f)(void *), + debugfs_automount_t f, void *data); void debugfs_remove(struct dentry *dentry); diff --git a/include/linux/mount.h b/include/linux/mount.h index c6f55158d5e5..8e0352af06b7 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -90,6 +90,9 @@ struct file_system_type; extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data); +extern struct vfsmount *vfs_submount(const struct dentry *mountpoint, + struct file_system_type *type, + const char *name, void *data); extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list); extern void mark_mounts_for_expiry(struct list_head *mounts); -- cgit v1.2.3 From 1a2a14444d32b89b28116daea86f63ced1716668 Mon Sep 17 00:00:00 2001 From: Dimitris Michailidis Date: Tue, 31 Jan 2017 16:03:13 -0800 Subject: net: fix ndo_features_check/ndo_fix_features comment ordering Commit cdba756f5803a2 ("net: move ndo_features_check() close to ndo_start_xmit()") inadvertently moved the doc comment for .ndo_fix_features instead of .ndo_features_check. Fix the comment ordering. Fixes: cdba756f5803a2 ("net: move ndo_features_check() close to ndo_start_xmit()") Signed-off-by: Dimitris Michailidis Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9bde9558b596..70ad0291d517 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -866,11 +866,15 @@ struct netdev_xdp { * of useless work if you return NETDEV_TX_BUSY. * Required; cannot be NULL. * - * netdev_features_t (*ndo_fix_features)(struct net_device *dev, - * netdev_features_t features); - * Adjusts the requested feature flags according to device-specific - * constraints, and returns the resulting flags. Must not modify - * the device state. + * netdev_features_t (*ndo_features_check)(struct sk_buff *skb, + * struct net_device *dev + * netdev_features_t features); + * Called by core transmit path to determine if device is capable of + * performing offload operations on a given packet. This is to give + * the device an opportunity to implement any restrictions that cannot + * be otherwise expressed by feature flags. The check is called with + * the set of features that the stack has calculated and it returns + * those the driver believes to be appropriate. * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, * void *accel_priv, select_queue_fallback_t fallback); @@ -1028,6 +1032,12 @@ struct netdev_xdp { * Called to release previously enslaved netdev. * * Feature/offload setting functions. + * netdev_features_t (*ndo_fix_features)(struct net_device *dev, + * netdev_features_t features); + * Adjusts the requested feature flags according to device-specific + * constraints, and returns the resulting flags. Must not modify + * the device state. + * * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); * Called to update device configuration to new features. Passed * feature set might be less than what was returned by ndo_fix_features()). @@ -1100,15 +1110,6 @@ struct netdev_xdp { * Callback to use for xmit over the accelerated station. This * is used in place of ndo_start_xmit on accelerated net * devices. - * netdev_features_t (*ndo_features_check)(struct sk_buff *skb, - * struct net_device *dev - * netdev_features_t features); - * Called by core transmit path to determine if device is capable of - * performing offload operations on a given packet. This is to give - * the device an opportunity to implement any restrictions that cannot - * be otherwise expressed by feature flags. The check is called with - * the set of features that the stack has calculated and it returns - * those the driver believes to be appropriate. * int (*ndo_set_tx_maxrate)(struct net_device *dev, * int queue_index, u32 maxrate); * Called when a user wants to set a max-rate limitation of specific -- cgit v1.2.3 From cb9c68363efb6d1f950ec55fb06e031ee70db5fc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Jan 2017 18:21:56 +0100 Subject: skbuff: add and use skb_nfct helper Followup patch renames skb->nfct and changes its type so add a helper to avoid intrusive rename change later. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b53c0cfd417e..276431e047af 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3553,6 +3553,15 @@ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr, skb->csum = csum_add(skb->csum, delta); } +static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + return skb->nfct; +#else + return NULL; +#endif +} + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) @@ -3652,9 +3661,7 @@ static inline bool skb_irq_freeable(const struct sk_buff *skb) #if IS_ENABLED(CONFIG_XFRM) !skb->sp && #endif -#if IS_ENABLED(CONFIG_NF_CONNTRACK) - !skb->nfct && -#endif + !skb_nfct(skb) && !skb->_skb_refdst && !skb_has_frag_list(skb); } -- cgit v1.2.3 From a9e419dc7be6997409dca6d1b9daf3cc7046902f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Jan 2017 18:21:59 +0100 Subject: netfilter: merge ctinfo into nfct pointer storage area After this change conntrack operations (lookup, creation, matching from ruleset) only access one instead of two sk_buff cache lines. This works for normal conntracks because those are allocated from a slab that guarantees hw cacheline or 8byte alignment (whatever is larger) so the 3 bits needed for ctinfo won't overlap with nf_conn addresses. Template allocation now does manual address alignment (see previous change) on arches that don't have sufficent kmalloc min alignment. Some spots intentionally use skb->_nfct instead of skb_nfct() helpers, this is to avoid undoing the skb_nfct() use when we remove untracked conntrack object in the future. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/skbuff.h | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 276431e047af..ac0bc085b139 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -585,7 +585,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @cloned: Head may be cloned (check refcnt to be sure) * @ip_summed: Driver fed us an IP checksum * @nohdr: Payload reference only, must not modify header - * @nfctinfo: Relationship of this skb to the connection * @pkt_type: Packet class * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs @@ -594,7 +593,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @nf_trace: netfilter packet trace flag * @protocol: Packet protocol from driver * @destructor: Destruct function - * @nfct: Associated connection, if any + * @_nfct: Associated connection, if any (with nfctinfo bits) * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index @@ -668,7 +667,7 @@ struct sk_buff { struct sec_path *sp; #endif #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - struct nf_conntrack *nfct; + unsigned long _nfct; #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info *nf_bridge; @@ -721,7 +720,6 @@ struct sk_buff { __u8 pkt_type:3; __u8 pfmemalloc:1; __u8 ignore_df:1; - __u8 nfctinfo:3; __u8 nf_trace:1; __u8 ip_summed:2; @@ -836,6 +834,7 @@ static inline bool skb_pfmemalloc(const struct sk_buff *skb) #define SKB_DST_NOREF 1UL #define SKB_DST_PTRMASK ~(SKB_DST_NOREF) +#define SKB_NFCT_PTRMASK ~(7UL) /** * skb_dst - returns skb dst_entry * @skb: buffer @@ -3556,7 +3555,7 @@ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr, static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) - return skb->nfct; + return (void *)(skb->_nfct & SKB_NFCT_PTRMASK); #else return NULL; #endif @@ -3590,8 +3589,8 @@ static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge) static inline void nf_reset(struct sk_buff *skb) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - nf_conntrack_put(skb->nfct); - skb->nfct = NULL; + nf_conntrack_put(skb_nfct(skb)); + skb->_nfct = 0; #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(skb->nf_bridge); @@ -3611,10 +3610,8 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, bool copy) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - dst->nfct = src->nfct; - nf_conntrack_get(src->nfct); - if (copy) - dst->nfctinfo = src->nfctinfo; + dst->_nfct = src->_nfct; + nf_conntrack_get(skb_nfct(src)); #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) dst->nf_bridge = src->nf_bridge; @@ -3629,7 +3626,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - nf_conntrack_put(dst->nfct); + nf_conntrack_put(skb_nfct(dst)); #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(dst->nf_bridge); -- cgit v1.2.3 From f44f1ab5a2dcd4e16eab850fd08e40ff2d0c28d4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Feb 2017 15:56:49 +0100 Subject: block: Unhash block device inodes on gendisk destruction Currently, block device inodes stay around after corresponding gendisk hash died until memory reclaim finds them and frees them. Since we will make block device inode pin the bdi, we want to free the block device inode as soon as the device goes away so that bdi does not stay around unnecessarily. Furthermore we need to avoid issues when new device with the same major,minor pair gets created since reusing the bdi structure would be rather difficult in this case. Unhashing block device inode on gendisk destruction nicely deals with these problems. Once last block device inode reference is dropped (which may be directly in del_gendisk()), the inode gets evicted. Furthermore if the major,minor pair gets reallocated, we are guaranteed to get new block device inode even if old block device inode is not yet evicted and thus we avoid issues with possible reuse of bdi. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2ba074328894..702cb6c50194 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2342,6 +2342,7 @@ extern struct kmem_cache *names_cachep; #ifdef CONFIG_BLOCK extern int register_blkdev(unsigned int, const char *); extern void unregister_blkdev(unsigned int, const char *); +extern void bdev_unhash_inode(dev_t dev); extern struct block_device *bdget(dev_t); extern struct block_device *bdgrab(struct block_device *bdev); extern void bd_set_size(struct block_device *, loff_t size); -- cgit v1.2.3 From dc3b17cc8bf21307c7e076e7c778d5db756f7871 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Feb 2017 15:56:50 +0100 Subject: block: Use pointer to backing_dev_info from request_queue We will want to have struct backing_dev_info allocated separately from struct request_queue. As the first step add pointer to backing_dev_info to request_queue and convert all users touching it. No functional changes in this patch. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 11f7a8e86a89..a75e42de34ab 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -432,7 +432,8 @@ struct request_queue { */ struct delayed_work delay_work; - struct backing_dev_info backing_dev_info; + struct backing_dev_info *backing_dev_info; + struct backing_dev_info _backing_dev_info; /* * The queue owner gets to use this for whatever they like. -- cgit v1.2.3 From d03f6cdc1fc422accb734c7c07a661a0018d8631 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Feb 2017 15:56:51 +0100 Subject: block: Dynamically allocate and refcount backing_dev_info Instead of storing backing_dev_info inside struct request_queue, allocate it dynamically, reference count it, and free it when the last reference is dropped. Currently only request_queue holds the reference but in the following patch we add other users referencing backing_dev_info. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 2 ++ include/linux/backing-dev.h | 10 +++++++++- include/linux/blkdev.h | 1 - 3 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index e850e76acaaf..ad955817916d 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -10,6 +10,7 @@ #include #include #include +#include struct page; struct device; @@ -144,6 +145,7 @@ struct backing_dev_info { char *name; + struct kref refcnt; /* Reference counter for the structure */ unsigned int capabilities; /* Device capabilities */ unsigned int min_ratio; unsigned int max_ratio, max_prop_frac; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 43b93a947e61..efb6ca992d05 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -18,7 +18,14 @@ #include int __must_check bdi_init(struct backing_dev_info *bdi); -void bdi_exit(struct backing_dev_info *bdi); + +static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) +{ + kref_get(&bdi->refcnt); + return bdi; +} + +void bdi_put(struct backing_dev_info *bdi); __printf(3, 4) int bdi_register(struct backing_dev_info *bdi, struct device *parent, @@ -29,6 +36,7 @@ void bdi_unregister(struct backing_dev_info *bdi); int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); void bdi_destroy(struct backing_dev_info *bdi); +struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id); void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, bool range_cyclic, enum wb_reason reason); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a75e42de34ab..e77c1039fd0e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -433,7 +433,6 @@ struct request_queue { struct delayed_work delay_work; struct backing_dev_info *backing_dev_info; - struct backing_dev_info _backing_dev_info; /* * The queue owner gets to use this for whatever they like. -- cgit v1.2.3 From b1d2dc5659b41741f5a29b2ade76ffb4e5bb13d8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Feb 2017 15:56:52 +0100 Subject: block: Make blk_get_backing_dev_info() safe without open bdev Currenly blk_get_backing_dev_info() is not safe to be called when the block device is not open as bdev->bd_disk is NULL in that case. However inode_to_bdi() uses this function and may be call called from flusher worker or other writeback related functions without bdev being open which leads to crashes such as: [113031.075540] Unable to handle kernel paging request for data at address 0x00000000 [113031.075614] Faulting instruction address: 0xc0000000003692e0 0:mon> t [c0000000fb65f900] c00000000036cb6c writeback_sb_inodes+0x30c/0x590 [c0000000fb65fa10] c00000000036ced4 __writeback_inodes_wb+0xe4/0x150 [c0000000fb65fa70] c00000000036d33c wb_writeback+0x30c/0x450 [c0000000fb65fb40] c00000000036e198 wb_workfn+0x268/0x580 [c0000000fb65fc50] c0000000000f3470 process_one_work+0x1e0/0x590 [c0000000fb65fce0] c0000000000f38c8 worker_thread+0xa8/0x660 [c0000000fb65fd80] c0000000000fc4b0 kthread+0x110/0x130 [c0000000fb65fe30] c0000000000098f0 ret_from_kernel_thread+0x5c/0x6c Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 702cb6c50194..c930cbc19342 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -423,6 +423,7 @@ struct block_device { int bd_invalidated; struct gendisk * bd_disk; struct request_queue * bd_queue; + struct backing_dev_info *bd_bdi; struct list_head bd_list; /* * Private data. You must have bd_claim'ed the block_device -- cgit v1.2.3 From efa7c9f97e3ef624e9a398bf69c15f58eea9f0e8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Feb 2017 15:56:53 +0100 Subject: block: Get rid of blk_get_backing_dev_info() blk_get_backing_dev_info() is now a simple dereference. Remove that function and simplify some code around that. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 2 +- include/linux/blkdev.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index efb6ca992d05..c52a48cb9a66 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -191,7 +191,7 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) sb = inode->i_sb; #ifdef CONFIG_BLOCK if (sb_is_blkdev_sb(sb)) - return blk_get_backing_dev_info(I_BDEV(inode)); + return I_BDEV(inode)->bd_bdi; #endif return sb->s_bdi; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e77c1039fd0e..b31137e2afd0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1187,7 +1187,6 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); -extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) { -- cgit v1.2.3 From 0dba1314d4f81115dce711292ec7981d17231064 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 1 Feb 2017 14:05:23 -0800 Subject: scsi, block: fix duplicate bdi name registration crashes Warnings of the following form occur because scsi reuses a devt number while the block layer still has it referenced as the name of the bdi [1]: WARNING: CPU: 1 PID: 93 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x62/0x80 sysfs: cannot create duplicate filename '/devices/virtual/bdi/8:192' [..] Call Trace: dump_stack+0x86/0xc3 __warn+0xcb/0xf0 warn_slowpath_fmt+0x5f/0x80 ? kernfs_path_from_node+0x4f/0x60 sysfs_warn_dup+0x62/0x80 sysfs_create_dir_ns+0x77/0x90 kobject_add_internal+0xb2/0x350 kobject_add+0x75/0xd0 device_add+0x15a/0x650 device_create_groups_vargs+0xe0/0xf0 device_create_vargs+0x1c/0x20 bdi_register+0x90/0x240 ? lockdep_init_map+0x57/0x200 bdi_register_owner+0x36/0x60 device_add_disk+0x1bb/0x4e0 ? __pm_runtime_use_autosuspend+0x5c/0x70 sd_probe_async+0x10d/0x1c0 async_run_entry_fn+0x39/0x170 This is a brute-force fix to pass the devt release information from sd_probe() to the locations where we register the bdi, device_add_disk(), and unregister the bdi, blk_cleanup_queue(). Thanks to Omar for the quick reproducer script [2]. This patch survives where an unmodified kernel fails in a few seconds. [1]: https://marc.info/?l=linux-scsi&m=147116857810716&w=4 [2]: http://marc.info/?l=linux-block&m=148554717109098&w=2 Cc: James Bottomley Cc: Bart Van Assche Cc: "Martin K. Petersen" Cc: Jan Kara Reported-by: Omar Sandoval Tested-by: Omar Sandoval Signed-off-by: Dan Williams Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + include/linux/genhd.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b31137e2afd0..f84fbe55d3b3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -433,6 +433,7 @@ struct request_queue { struct delayed_work delay_work; struct backing_dev_info *backing_dev_info; + struct disk_devt *disk_devt; /* * The queue owner gets to use this for whatever they like. diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 76f39754e7b0..a999d281a2f1 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -167,6 +167,13 @@ struct blk_integrity { }; #endif /* CONFIG_BLK_DEV_INTEGRITY */ +struct disk_devt { + atomic_t count; + void (*release)(struct disk_devt *disk_devt); +}; + +void put_disk_devt(struct disk_devt *disk_devt); +void get_disk_devt(struct disk_devt *disk_devt); struct gendisk { /* major, first_minor and minors are input parameters only, @@ -176,6 +183,7 @@ struct gendisk { int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ + struct disk_devt *disk_devt; char disk_name[DISK_NAME_LEN]; /* name of major driver */ char *(*devnode)(struct gendisk *gd, umode_t *mode); -- cgit v1.2.3 From 1c83a9aab807f7452c4957b2401e1cbf43941820 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 2 Feb 2017 11:52:14 -0500 Subject: ext4: move halfmd4 into hash.c directly The "half md4" transform should not be used by any new code. And fortunately, it's only used now by ext4. Since ext4 supports several hashing methods, at some point it might be desirable to move to something like SipHash. As an intermediate step, remove half md4 from cryptohash.h and lib, and make it just a local function in ext4's hash.c. There's precedent for doing this; the other function ext can use for its hashes -- TEA -- is also implemented in the same place. Also, by being a local function, this might allow gcc to perform some additional optimizations. Signed-off-by: Jason A. Donenfeld Reviewed-by: Andreas Dilger Cc: Theodore Ts'o Signed-off-by: Theodore Ts'o --- include/linux/cryptohash.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h index f4754282c9c2..3252799832cf 100644 --- a/include/linux/cryptohash.h +++ b/include/linux/cryptohash.h @@ -15,6 +15,4 @@ void sha_transform(__u32 *digest, const char *data, __u32 *W); void md5_transform(__u32 *hash, __u32 const *in); -__u32 half_md4_transform(__u32 buf[4], __u32 const in[8]); - #endif -- cgit v1.2.3 From a7c5437b0bbec5165df6eb1efc5e37dc40b9e05d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 31 Jan 2017 14:53:17 -0800 Subject: debugfs: add debugfs_lookup() We don't always have easy access to the dentry of a file or directory we created in debugfs. Add a helper which allows us to get a dentry we previously created. The motivation for this change is a problem with blktrace and the blk-mq debugfs entries introduced in 07e4fead45e6 ("blk-mq: create debugfs directory tree"). Namely, in some cases, the directory that blktrace needs to create may already exist, but in other cases, it may not. We _could_ rely on a bunch of implied knowledge to decide whether to create the directory or not, but it's much cleaner on our end to just look it up. Signed-off-by: Omar Sandoval Acked-by: Greg Kroah-Hartman Signed-off-by: Jens Axboe --- include/linux/debugfs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 014cc564d1c4..c0befcf41b58 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -80,6 +80,8 @@ static const struct file_operations __fops = { \ #if defined(CONFIG_DEBUG_FS) +struct dentry *debugfs_lookup(const char *name, struct dentry *parent); + struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); @@ -181,6 +183,12 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, * want to duplicate the design decision mistakes of procfs and devfs again. */ +static inline struct dentry *debugfs_lookup(const char *name, + struct dentry *parent) +{ + return ERR_PTR(-ENODEV); +} + static inline struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) -- cgit v1.2.3 From 03796c149a99e14506db9de3adba710c26f83ba9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 31 Jan 2017 14:53:18 -0800 Subject: block: fix debugfs config conditional in struct request_queue The debugfs dentries are only used for CONFIG_BLK_DEBUG_FS, so make them conditional on that instead of CONFIG_DEBUG_FS. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f84fbe55d3b3..e0bac14347e6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -570,7 +570,7 @@ struct request_queue { struct list_head tag_set_list; struct bio_set *bio_split; -#ifdef CONFIG_DEBUG_FS +#ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; struct dentry *mq_debugfs_dir; #endif -- cgit v1.2.3 From a428d314ebcf65842fd64ad850c02c280586e74d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 31 Jan 2017 14:53:19 -0800 Subject: blktrace: make do_blk_trace_setup() static This isn't used outside of blktrace.c anymore. Fixes: 62c2a7d969f3 ("block: push BKL into blktrace ioctls") Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 341f9418bd68..d2e908586e3d 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -30,9 +30,6 @@ struct blk_trace { extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); -extern int do_blk_trace_setup(struct request_queue *q, char *name, - dev_t dev, struct block_device *bdev, - struct blk_user_trace_setup *buts); extern __printf(2, 3) void __trace_note_message(struct blk_trace *, const char *fmt, ...); @@ -80,7 +77,6 @@ extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) # define blk_trace_shutdown(q) do { } while (0) -# define do_blk_trace_setup(q, name, dev, bdev, buts) (-ENOTTY) # define blk_add_driver_data(q, rq, data, len) do {} while (0) # define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) # define blk_trace_startstop(q, start) (-ENOTTY) -- cgit v1.2.3 From b343d77b94b6702082f4f402e1492e71ef87095b Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 30 Jan 2017 13:38:22 +0100 Subject: soc: samsung: pmu: Add register defines for pad retention control Add PMU defines related to pad retention control. Will be later used by the Exynos pin controller driver. Signed-off-by: Marek Szyprowski Signed-off-by: Krzysztof Kozlowski --- include/linux/soc/samsung/exynos-regs-pmu.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index 9786c62d7159..49df0a01a2cc 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -631,4 +631,20 @@ | EXYNOS5420_KFC_USE_STANDBY_WFI2 \ | EXYNOS5420_KFC_USE_STANDBY_WFI3) +/* For EXYNOS5433 */ +#define EXYNOS5433_PAD_RETENTION_AUD_OPTION (0x3028) +#define EXYNOS5433_PAD_RETENTION_MMC2_OPTION (0x30C8) +#define EXYNOS5433_PAD_RETENTION_TOP_OPTION (0x3108) +#define EXYNOS5433_PAD_RETENTION_UART_OPTION (0x3128) +#define EXYNOS5433_PAD_RETENTION_MMC0_OPTION (0x3148) +#define EXYNOS5433_PAD_RETENTION_MMC1_OPTION (0x3168) +#define EXYNOS5433_PAD_RETENTION_EBIA_OPTION (0x3188) +#define EXYNOS5433_PAD_RETENTION_EBIB_OPTION (0x31A8) +#define EXYNOS5433_PAD_RETENTION_SPI_OPTION (0x31C8) +#define EXYNOS5433_PAD_RETENTION_MIF_OPTION (0x31E8) +#define EXYNOS5433_PAD_RETENTION_USBXTI_OPTION (0x3228) +#define EXYNOS5433_PAD_RETENTION_BOOTLDO_OPTION (0x3248) +#define EXYNOS5433_PAD_RETENTION_UFS_OPTION (0x3268) +#define EXYNOS5433_PAD_RETENTION_FSYSGENIO_OPTION (0x32A8) + #endif /* __LINUX_SOC_EXYNOS_REGS_PMU_H */ -- cgit v1.2.3 From a45463cbf3f9dcdae683033c256f50bded513d6a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 1 Feb 2017 18:01:17 +0100 Subject: workqueue: avoid clang warning Building with clang shows lots of warning like: drivers/amba/bus.c:447:8: warning: implicit conversion from 'long long' to 'int' changes value from 4294967248 to -48 [-Wconstant-conversion] static DECLARE_DELAYED_WORK(deferred_retry_work, amba_deferred_retry_func); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/workqueue.h:187:26: note: expanded from macro 'DECLARE_DELAYED_WORK' struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/workqueue.h:177:10: note: expanded from macro '__DELAYED_WORK_INITIALIZER' .work = __WORK_INITIALIZER((n).work, (f)), \ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/workqueue.h:170:10: note: expanded from macro '__WORK_INITIALIZER' .data = WORK_DATA_STATIC_INIT(), \ ^~~~~~~~~~~~~~~~~~~~~~~ include/linux/workqueue.h:111:39: note: expanded from macro 'WORK_DATA_STATIC_INIT' ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~ include/asm-generic/atomic-long.h:32:41: note: expanded from macro 'ATOMIC_LONG_INIT' #define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i) ~~~~~~~~~~~~^~ arch/arm/include/asm/atomic.h:21:27: note: expanded from macro 'ATOMIC_INIT' #define ATOMIC_INIT(i) { (i) } ~ ^ This makes the type cast explicit, which shuts up the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index a26cc437293c..bde063cefd04 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -106,9 +106,9 @@ struct work_struct { #endif }; -#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL) +#define WORK_DATA_INIT() ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL) #define WORK_DATA_STATIC_INIT() \ - ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC) + ATOMIC_LONG_INIT((unsigned long)(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)) struct delayed_work { struct work_struct work; -- cgit v1.2.3 From e4cf8a38fc0d257b4070066e46a678f4777fecaa Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 1 Feb 2017 03:40:06 +0100 Subject: net: phy: Marvell: Add mv88e6390 internal PHY The mv88e6390 Ethernet switch has internal PHYs. These PHYs don't have an model ID in the ID2 register. So the MDIO driver in the switch intercepts reads to this register, and returns the switch family ID. Extend the Marvell PHY driver by including this ID, and treat the PHY as a 88E1540. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/marvell_phy.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index a57f0dfb6db7..3d616d7f65bf 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -19,6 +19,12 @@ #define MARVELL_PHY_ID_88E1540 0x01410eb0 #define MARVELL_PHY_ID_88E3016 0x01410e60 +/* The MV88e6390 Ethernet switch contains embedded PHYs. These PHYs do + * not have a model ID. So the switch driver traps reads to the ID2 + * register and returns the switch family ID + */ +#define MARVELL_PHY_ID_88E6390 0x01410f90 + /* struct phy_device dev_flags definitions */ #define MARVELL_PHY_M1145_FLAGS_RESISTANCE 0x00000001 #define MARVELL_PHY_M1118_DNS323_LEDS 0x00000002 -- cgit v1.2.3 From 60f06fde4cff6f6134decbf09199b5e047bc3355 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 2 Feb 2017 00:35:03 +0100 Subject: net: phy: marvell: Add support for 88e1545 PHY The 88e1545 PHYs are discrete Marvell PHYs, found in a quad package on the zii-devel-b board. Add support for it to the Marvell PHY driver. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/marvell_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index 3d616d7f65bf..4055cf8cc978 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -17,6 +17,7 @@ #define MARVELL_PHY_ID_88E1116R 0x01410e40 #define MARVELL_PHY_ID_88E1510 0x01410dd0 #define MARVELL_PHY_ID_88E1540 0x01410eb0 +#define MARVELL_PHY_ID_88E1545 0x01410ea0 #define MARVELL_PHY_ID_88E3016 0x01410e60 /* The MV88e6390 Ethernet switch contains embedded PHYs. These PHYs do -- cgit v1.2.3 From 0f1b92cbdd0309afae0af1963e8cccddb3d2eaff Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 30 Jan 2017 18:06:11 +0300 Subject: introduce the walk_process_tree() helper Add the new helper to walk the process tree, the next patch adds a user. Note that it visits the group leaders only, proc_visitor can do for_each_thread itself or we can trivially extend walk_process_tree() to do this. Signed-off-by: Oleg Nesterov Signed-off-by: Pavel Tikhomirov Signed-off-by: Eric W. Biederman --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d2334229167f..6261bfc12853 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3053,6 +3053,9 @@ extern bool current_is_single_threaded(void); #define for_each_process_thread(p, t) \ for_each_process(p) for_each_thread(p, t) +typedef int (*proc_visitor)(struct task_struct *p, void *data); +void walk_process_tree(struct task_struct *top, proc_visitor, void *); + static inline int get_nr_threads(struct task_struct *tsk) { return tsk->signal->nr_threads; -- cgit v1.2.3 From c3485ee0d560b182e1e0f67d67246718739f0782 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 2 Feb 2017 13:48:05 -0600 Subject: tty_port: Add port client functions Introduce a client (upward direction) operations struct for tty_port clients. Initially supported operations are for receiving data and write wake-up. This will allow for having clients other than an ldisc. Convert the calls to the ldisc to use the client ops as the default operations. Signed-off-by: Rob Herring Reviewed-By: Sebastian Reichel Tested-By: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 21c0fabfed60..1017e904c0a3 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -217,12 +217,18 @@ struct tty_port_operations { /* Called on the final put of a port */ void (*destruct)(struct tty_port *port); }; - + +struct tty_port_client_operations { + int (*receive_buf)(struct tty_port *port, const unsigned char *, const unsigned char *, size_t); + void (*write_wakeup)(struct tty_port *port); +}; + struct tty_port { struct tty_bufhead buf; /* Locked internally */ struct tty_struct *tty; /* Back pointer */ struct tty_struct *itty; /* internal back ptr */ const struct tty_port_operations *ops; /* Port operations */ + const struct tty_port_client_operations *client_ops; /* Port client operations */ spinlock_t lock; /* Lock protecting tty field */ int blocked_open; /* Waiting to open */ int count; /* Usage count */ @@ -241,6 +247,7 @@ struct tty_port { based drain is needed else set to size of fifo */ struct kref kref; /* Ref counter */ + void *client_data; }; /* tty_port::iflags bits -- use atomic bit ops */ -- cgit v1.2.3 From cd6484e1830be260abfba80a9c7d8f65531126d6 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 2 Feb 2017 13:48:07 -0600 Subject: serdev: Introduce new bus for serial attached devices The serdev bus is designed for devices such as Bluetooth, WiFi, GPS and NFC connected to UARTs on host processors. Tradionally these have been handled with tty line disciplines, rfkill, and userspace glue such as hciattach. This approach has many drawbacks since it doesn't fit into the Linux driver model. Handling of sideband signals, power control and firmware loading are the main issues. This creates a serdev bus with controllers (i.e. host serial ports) and attached devices. Typically, these are point to point connections, but some devices have muxing protocols or a h/w mux is conceivable. Any muxing is not yet supported with the serdev bus. Signed-off-by: Rob Herring Reviewed-By: Sebastian Reichel Tested-By: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- include/linux/serdev.h | 241 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 include/linux/serdev.h (limited to 'include/linux') diff --git a/include/linux/serdev.h b/include/linux/serdev.h new file mode 100644 index 000000000000..3ac26c1a8aab --- /dev/null +++ b/include/linux/serdev.h @@ -0,0 +1,241 @@ +/* + * Copyright (C) 2016-2017 Linaro Ltd., Rob Herring + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef _LINUX_SERDEV_H +#define _LINUX_SERDEV_H + +#include +#include + +struct serdev_controller; +struct serdev_device; + +/* + * serdev device structures + */ + +/** + * struct serdev_device_ops - Callback operations for a serdev device + * @receive_buf: Function called with data received from device. + * @write_wakeup: Function called when ready to transmit more data. + */ +struct serdev_device_ops { + int (*receive_buf)(struct serdev_device *, const unsigned char *, size_t); + void (*write_wakeup)(struct serdev_device *); +}; + +/** + * struct serdev_device - Basic representation of an serdev device + * @dev: Driver model representation of the device. + * @nr: Device number on serdev bus. + * @ctrl: serdev controller managing this device. + * @ops: Device operations. + */ +struct serdev_device { + struct device dev; + int nr; + struct serdev_controller *ctrl; + const struct serdev_device_ops *ops; +}; + +static inline struct serdev_device *to_serdev_device(struct device *d) +{ + return container_of(d, struct serdev_device, dev); +} + +/** + * struct serdev_device_driver - serdev slave device driver + * @driver: serdev device drivers should initialize name field of this + * structure. + * @probe: binds this driver to a serdev device. + * @remove: unbinds this driver from the serdev device. + */ +struct serdev_device_driver { + struct device_driver driver; + int (*probe)(struct serdev_device *); + void (*remove)(struct serdev_device *); +}; + +static inline struct serdev_device_driver *to_serdev_device_driver(struct device_driver *d) +{ + return container_of(d, struct serdev_device_driver, driver); +} + +/* + * serdev controller structures + */ +struct serdev_controller_ops { + int (*write_buf)(struct serdev_controller *, const unsigned char *, size_t); + void (*write_flush)(struct serdev_controller *); + int (*write_room)(struct serdev_controller *); + int (*open)(struct serdev_controller *); + void (*close)(struct serdev_controller *); + void (*set_flow_control)(struct serdev_controller *, bool); + unsigned int (*set_baudrate)(struct serdev_controller *, unsigned int); +}; + +/** + * struct serdev_controller - interface to the serdev controller + * @dev: Driver model representation of the device. + * @nr: number identifier for this controller/bus. + * @serdev: Pointer to slave device for this controller. + * @ops: Controller operations. + */ +struct serdev_controller { + struct device dev; + unsigned int nr; + struct serdev_device *serdev; + const struct serdev_controller_ops *ops; +}; + +static inline struct serdev_controller *to_serdev_controller(struct device *d) +{ + return container_of(d, struct serdev_controller, dev); +} + +static inline void *serdev_device_get_drvdata(const struct serdev_device *serdev) +{ + return dev_get_drvdata(&serdev->dev); +} + +static inline void serdev_device_set_drvdata(struct serdev_device *serdev, void *data) +{ + dev_set_drvdata(&serdev->dev, data); +} + +/** + * serdev_device_put() - decrement serdev device refcount + * @serdev serdev device. + */ +static inline void serdev_device_put(struct serdev_device *serdev) +{ + if (serdev) + put_device(&serdev->dev); +} + +static inline void serdev_device_set_client_ops(struct serdev_device *serdev, + const struct serdev_device_ops *ops) +{ + serdev->ops = ops; +} + +static inline +void *serdev_controller_get_drvdata(const struct serdev_controller *ctrl) +{ + return ctrl ? dev_get_drvdata(&ctrl->dev) : NULL; +} + +static inline void serdev_controller_set_drvdata(struct serdev_controller *ctrl, + void *data) +{ + dev_set_drvdata(&ctrl->dev, data); +} + +/** + * serdev_controller_put() - decrement controller refcount + * @ctrl serdev controller. + */ +static inline void serdev_controller_put(struct serdev_controller *ctrl) +{ + if (ctrl) + put_device(&ctrl->dev); +} + +struct serdev_device *serdev_device_alloc(struct serdev_controller *); +int serdev_device_add(struct serdev_device *); +void serdev_device_remove(struct serdev_device *); + +struct serdev_controller *serdev_controller_alloc(struct device *, size_t); +int serdev_controller_add(struct serdev_controller *); +void serdev_controller_remove(struct serdev_controller *); + +static inline void serdev_controller_write_wakeup(struct serdev_controller *ctrl) +{ + struct serdev_device *serdev = ctrl->serdev; + + if (!serdev || !serdev->ops->write_wakeup) + return; + + serdev->ops->write_wakeup(ctrl->serdev); +} + +static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl, + const unsigned char *data, + size_t count) +{ + struct serdev_device *serdev = ctrl->serdev; + + if (!serdev || !serdev->ops->receive_buf) + return -EINVAL; + + return serdev->ops->receive_buf(ctrl->serdev, data, count); +} + +#if IS_ENABLED(CONFIG_SERIAL_DEV_BUS) + +int serdev_device_open(struct serdev_device *); +void serdev_device_close(struct serdev_device *); +unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int); +void serdev_device_set_flow_control(struct serdev_device *, bool); +int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t); +void serdev_device_write_flush(struct serdev_device *); +int serdev_device_write_room(struct serdev_device *); + +/* + * serdev device driver functions + */ +int __serdev_device_driver_register(struct serdev_device_driver *, struct module *); +#define serdev_device_driver_register(sdrv) \ + __serdev_device_driver_register(sdrv, THIS_MODULE) + +/** + * serdev_device_driver_unregister() - unregister an serdev client driver + * @sdrv: the driver to unregister + */ +static inline void serdev_device_driver_unregister(struct serdev_device_driver *sdrv) +{ + if (sdrv) + driver_unregister(&sdrv->driver); +} + +#define module_serdev_device_driver(__serdev_device_driver) \ + module_driver(__serdev_device_driver, serdev_device_driver_register, \ + serdev_device_driver_unregister) + +#else + +static inline int serdev_device_open(struct serdev_device *sdev) +{ + return -ENODEV; +} +static inline void serdev_device_close(struct serdev_device *sdev) {} +static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev, unsigned int baudrate) +{ + return 0; +} +static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {} +static inline int serdev_device_write_buf(struct serdev_device *sdev, const unsigned char *buf, size_t count) +{ + return -ENODEV; +} +static inline void serdev_device_write_flush(struct serdev_device *sdev) {} +static inline int serdev_device_write_room(struct serdev_device *sdev) +{ + return 0; +} + +#define serdev_device_driver_register(x) +#define serdev_device_driver_unregister(x) + +#endif /* CONFIG_SERIAL_DEV_BUS */ + +#endif /*_LINUX_SERDEV_H */ -- cgit v1.2.3 From bed35c6dfa6a36233c3e1238a40dc1ae67955898 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 2 Feb 2017 13:48:08 -0600 Subject: serdev: add a tty port controller driver Add a serdev controller driver for tty ports. The controller is registered with serdev when tty ports are registered with the TTY core. As the TTY core is built-in only, this has the side effect of making serdev built-in as well. Signed-off-by: Rob Herring Reviewed-By: Sebastian Reichel Tested-By: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman --- include/linux/serdev.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serdev.h b/include/linux/serdev.h index 3ac26c1a8aab..9519da6253a8 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -238,4 +238,25 @@ static inline int serdev_device_write_room(struct serdev_device *sdev) #endif /* CONFIG_SERIAL_DEV_BUS */ +/* + * serdev hooks into TTY core + */ +struct tty_port; +struct tty_driver; + +#ifdef CONFIG_SERIAL_DEV_CTRL_TTYPORT +struct device *serdev_tty_port_register(struct tty_port *port, + struct device *parent, + struct tty_driver *drv, int idx); +void serdev_tty_port_unregister(struct tty_port *port); +#else +static inline struct device *serdev_tty_port_register(struct tty_port *port, + struct device *parent, + struct tty_driver *drv, int idx) +{ + return ERR_PTR(-ENODEV); +} +static inline void serdev_tty_port_unregister(struct tty_port *port) {} +#endif /* CONFIG_SERIAL_DEV_CTRL_TTYPORT */ + #endif /*_LINUX_SERDEV_H */ -- cgit v1.2.3 From 3bb434cdcc6af3d4e70ba041e6f596e465d11e14 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 Feb 2017 15:42:57 +0100 Subject: vmw_vmci: switch to pci_irq_alloc_vectors Cleans up the IRQ management code a lot, including removing a lot of state from the per-device structure. Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- include/linux/vmw_vmci_defs.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 1bd31a38c51e..b724ef7005de 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -54,13 +54,6 @@ #define VMCI_IMR_DATAGRAM 0x1 #define VMCI_IMR_NOTIFICATION 0x2 -/* Interrupt type. */ -enum { - VMCI_INTR_TYPE_INTX = 0, - VMCI_INTR_TYPE_MSI = 1, - VMCI_INTR_TYPE_MSIX = 2, -}; - /* Maximum MSI/MSI-X interrupt vectors in the device. */ #define VMCI_MAX_INTRS 2 -- cgit v1.2.3 From d44fa3d46079dc095c1346fa6e5bc96dca1ead41 Mon Sep 17 00:00:00 2001 From: Agustin Vega-Frias Date: Thu, 2 Feb 2017 18:23:58 -0500 Subject: ACPI: Add support for ResourceSource/IRQ domain mapping ACPI extended IRQ resources may contain a ResourceSource to specify an alternate interrupt controller. Introduce acpi_irq_get and use it to implement ResourceSource/IRQ domain mapping. The new API is similar to of_irq_get and allows re-initialization of a platform resource from the ACPI extended IRQ resource, and provides proper behavior for probe deferral when the domain is not yet present when called. Acked-by: Rafael J. Wysocki Acked-by: Lorenzo Pieralisi Reviewed-by: Hanjun Guo Tested-by: Hanjun Guo Signed-off-by: Agustin Vega-Frias Signed-off-by: Marc Zyngier --- include/linux/acpi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 5b36974ed60a..8e577c2cb0ce 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1153,4 +1153,14 @@ int parse_spcr(bool earlycon); static inline int parse_spcr(bool earlycon) { return 0; } #endif +#if IS_ENABLED(CONFIG_ACPI_GENERIC_GSI) +int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res); +#else +static inline +int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res) +{ + return -EINVAL; +} +#endif + #endif /*_LINUX_ACPI_H*/ -- cgit v1.2.3 From 3541f9e8bdebce02458882b66b638d7302c1f616 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Feb 2017 08:04:56 -0800 Subject: tcp: add tcp_mss_clamp() helper Small cleanup factorizing code doing the TCP_MAXSEG clamping. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f88f4649ba6f..cfc2d9506ce8 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -445,4 +445,13 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp) struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk); +static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss) +{ + /* We use READ_ONCE() here because socket might not be locked. + * This happens for listeners. + */ + u16 user_mss = READ_ONCE(tp->rx_opt.user_mss); + + return (user_mss && user_mss < mss) ? user_mss : mss; +} #endif /* _LINUX_TCP_H */ -- cgit v1.2.3 From 71810db27c1c853b335675bee335d893bc3d324b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Feb 2017 09:54:06 +0000 Subject: modversions: treat symbol CRCs as 32 bit quantities The modversion symbol CRCs are emitted as ELF symbols, which allows us to easily populate the kcrctab sections by relying on the linker to associate each kcrctab slot with the correct value. This has a couple of downsides: - Given that the CRCs are treated as memory addresses, we waste 4 bytes for each CRC on 64 bit architectures, - On architectures that support runtime relocation, a R__RELATIVE relocation entry is emitted for each CRC value, which identifies it as a quantity that requires fixing up based on the actual runtime load offset of the kernel. This results in corrupted CRCs unless we explicitly undo the fixup (and this is currently being handled in the core module code) - Such runtime relocation entries take up 24 bytes of __init space each, resulting in a x8 overhead in [uncompressed] kernel size for CRCs. Switching to explicit 32 bit values on 64 bit architectures fixes most of these issues, given that 32 bit values are not treated as quantities that require fixing up based on the actual runtime load offset. Note that on some ELF64 architectures [such as PPC64], these 32-bit values are still emitted as [absolute] runtime relocatable quantities, even if the value resolves to a build time constant. Since relative relocations are always resolved at build time, this patch enables MODULE_REL_CRCS on powerpc when CONFIG_RELOCATABLE=y, which turns the absolute CRC references into relative references into .rodata where the actual CRC value is stored. So redefine all CRC fields and variables as u32, and redefine the __CRC_SYMBOL() macro for 64 bit builds to emit the CRC reference using inline assembler (which is necessary since 64-bit C code cannot use 32-bit types to hold memory addresses, even if they are ultimately resolved using values that do not exceed 0xffffffff). To avoid potential problems with legacy 32-bit architectures using legacy toolchains, the equivalent C definition of the kcrctab entry is retained for 32-bit architectures. Note that this mostly reverts commit d4703aefdbc8 ("module: handle ppc64 relocating kcrctabs when CONFIG_RELOCATABLE=y") Acked-by: Rusty Russell Signed-off-by: Ard Biesheuvel Signed-off-by: Linus Torvalds --- include/linux/export.h | 14 ++++++++++++++ include/linux/module.h | 14 +++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/export.h b/include/linux/export.h index 2a0f61fbc731..7473fba6a60c 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -43,6 +43,13 @@ extern struct module __this_module; #ifdef CONFIG_MODVERSIONS /* Mark the CRC weak since genksyms apparently decides not to * generate a checksums for some symbols */ +#if defined(CONFIG_MODULE_REL_CRCS) +#define __CRC_SYMBOL(sym, sec) \ + asm(" .section \"___kcrctab" sec "+" #sym "\", \"a\" \n" \ + " .weak " VMLINUX_SYMBOL_STR(__crc_##sym) " \n" \ + " .long " VMLINUX_SYMBOL_STR(__crc_##sym) " - . \n" \ + " .previous \n"); +#elif !defined(CONFIG_64BIT) #define __CRC_SYMBOL(sym, sec) \ extern __visible void *__crc_##sym __attribute__((weak)); \ static const unsigned long __kcrctab_##sym \ @@ -50,6 +57,13 @@ extern struct module __this_module; __attribute__((section("___kcrctab" sec "+" #sym), used)) \ = (unsigned long) &__crc_##sym; #else +#define __CRC_SYMBOL(sym, sec) \ + asm(" .section \"___kcrctab" sec "+" #sym "\", \"a\" \n" \ + " .weak " VMLINUX_SYMBOL_STR(__crc_##sym) " \n" \ + " .long " VMLINUX_SYMBOL_STR(__crc_##sym) " \n" \ + " .previous \n"); +#endif +#else #define __CRC_SYMBOL(sym, sec) #endif diff --git a/include/linux/module.h b/include/linux/module.h index 7c84273d60b9..cc7cba219b20 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -346,7 +346,7 @@ struct module { /* Exported symbols */ const struct kernel_symbol *syms; - const unsigned long *crcs; + const s32 *crcs; unsigned int num_syms; /* Kernel parameters. */ @@ -359,18 +359,18 @@ struct module { /* GPL-only exported symbols. */ unsigned int num_gpl_syms; const struct kernel_symbol *gpl_syms; - const unsigned long *gpl_crcs; + const s32 *gpl_crcs; #ifdef CONFIG_UNUSED_SYMBOLS /* unused exported symbols. */ const struct kernel_symbol *unused_syms; - const unsigned long *unused_crcs; + const s32 *unused_crcs; unsigned int num_unused_syms; /* GPL-only, unused exported symbols. */ unsigned int num_unused_gpl_syms; const struct kernel_symbol *unused_gpl_syms; - const unsigned long *unused_gpl_crcs; + const s32 *unused_gpl_crcs; #endif #ifdef CONFIG_MODULE_SIG @@ -382,7 +382,7 @@ struct module { /* symbols that will be GPL-only in the near future. */ const struct kernel_symbol *gpl_future_syms; - const unsigned long *gpl_future_crcs; + const s32 *gpl_future_crcs; unsigned int num_gpl_future_syms; /* Exception table */ @@ -523,7 +523,7 @@ struct module *find_module(const char *name); struct symsearch { const struct kernel_symbol *start, *stop; - const unsigned long *crcs; + const s32 *crcs; enum { NOT_GPL_ONLY, GPL_ONLY, @@ -539,7 +539,7 @@ struct symsearch { */ const struct kernel_symbol *find_symbol(const char *name, struct module **owner, - const unsigned long **crc, + const s32 **crc, bool gplok, bool warn); -- cgit v1.2.3 From 4b9eee96fcb361a5e16a8d2619825e8a048f81f7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Feb 2017 09:54:07 +0000 Subject: module: unify absolute krctab definitions for 32-bit and 64-bit The previous patch introduced a separate inline asm version of the krcrctab declaration template for use with 64-bit architectures, which cannot refer to ELF symbols using 32-bit quantities. This declaration should be equivalent to the C one for 32-bit architectures, but just in case - unify them in a separate patch, which can simply be dropped if it turns out to break anything. Signed-off-by: Ard Biesheuvel Signed-off-by: Linus Torvalds --- include/linux/export.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/export.h b/include/linux/export.h index 7473fba6a60c..1a1dfdb2a5c6 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -49,13 +49,6 @@ extern struct module __this_module; " .weak " VMLINUX_SYMBOL_STR(__crc_##sym) " \n" \ " .long " VMLINUX_SYMBOL_STR(__crc_##sym) " - . \n" \ " .previous \n"); -#elif !defined(CONFIG_64BIT) -#define __CRC_SYMBOL(sym, sec) \ - extern __visible void *__crc_##sym __attribute__((weak)); \ - static const unsigned long __kcrctab_##sym \ - __used \ - __attribute__((section("___kcrctab" sec "+" #sym), used)) \ - = (unsigned long) &__crc_##sym; #else #define __CRC_SYMBOL(sym, sec) \ asm(" .section \"___kcrctab" sec "+" #sym "\", \"a\" \n" \ -- cgit v1.2.3 From 29905b52fad0854351f57bab867647e4982285bf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 2 Feb 2017 18:05:26 +0000 Subject: log2: make order_base_2() behave correctly on const input value zero The function order_base_2() is defined (according to the comment block) as returning zero on input zero, but subsequently passes the input into roundup_pow_of_two(), which is explicitly undefined for input zero. This has gone unnoticed until now, but optimization passes in GCC 7 may produce constant folded function instances where a constant value of zero is passed into order_base_2(), resulting in link errors against the deliberately undefined '____ilog2_NaN'. So update order_base_2() to adhere to its own documented interface. [ See http://marc.info/?l=linux-kernel&m=147672952517795&w=2 and follow-up discussion for more background. The gcc "optimization pass" is really just broken, but now the GCC trunk problem seems to have escaped out of just specially built daily images, so we need to work around it in mainline. - Linus ] Signed-off-by: Ard Biesheuvel Signed-off-by: Linus Torvalds --- include/linux/log2.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/log2.h b/include/linux/log2.h index fd7ff3d91e6a..ef3d4f67118c 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -203,6 +203,17 @@ unsigned long __rounddown_pow_of_two(unsigned long n) * ... and so on. */ -#define order_base_2(n) ilog2(roundup_pow_of_two(n)) +static inline __attribute_const__ +int __order_base_2(unsigned long n) +{ + return n > 1 ? ilog2(n - 1) + 1 : 0; +} +#define order_base_2(n) \ +( \ + __builtin_constant_p(n) ? ( \ + ((n) == 0 || (n) == 1) ? 0 : \ + ilog2((n) - 1) + 1) : \ + __order_base_2(n) \ +) #endif /* _LINUX_LOG2_H */ -- cgit v1.2.3 From 680a0873e193bae666439f4b5e32c758e68f114c Mon Sep 17 00:00:00 2001 From: Andy Gross Date: Wed, 1 Feb 2017 11:28:27 -0600 Subject: arm: kernel: Add SMC structure parameter This patch adds a quirk parameter to the arm_smccc_(smc/hvc) calls. The quirk structure allows for specialized SMC operations due to SoC specific requirements. The current arm_smccc_(smc/hvc) is renamed and macros are used instead to specify the standard arm_smccc_(smc/hvc) or the arm_smccc_(smc/hvc)_quirk function. This patch and partial implementation was suggested by Will Deacon. Signed-off-by: Andy Gross Reviewed-by: Will Deacon Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index b5abfda80465..c66f8ae94b5a 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -72,33 +72,57 @@ struct arm_smccc_res { }; /** - * arm_smccc_smc() - make SMC calls + * struct arm_smccc_quirk - Contains quirk information + * @id: quirk identification + * @state: quirk specific information + * @a6: Qualcomm quirk entry for returning post-smc call contents of a6 + */ +struct arm_smccc_quirk { + int id; + union { + unsigned long a6; + } state; +}; + +/** + * __arm_smccc_smc() - make SMC calls * @a0-a7: arguments passed in registers 0 to 7 * @res: result values from registers 0 to 3 + * @quirk: points to an arm_smccc_quirk, or NULL when no quirks are required. * * This function is used to make SMC calls following SMC Calling Convention. * The content of the supplied param are copied to registers 0 to 7 prior * to the SMC instruction. The return values are updated with the content - * from register 0 to 3 on return from the SMC instruction. + * from register 0 to 3 on return from the SMC instruction. An optional + * quirk structure provides vendor specific behavior. */ -asmlinkage void arm_smccc_smc(unsigned long a0, unsigned long a1, +asmlinkage void __arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4, unsigned long a5, unsigned long a6, unsigned long a7, - struct arm_smccc_res *res); + struct arm_smccc_res *res, struct arm_smccc_quirk *quirk); /** - * arm_smccc_hvc() - make HVC calls + * __arm_smccc_hvc() - make HVC calls * @a0-a7: arguments passed in registers 0 to 7 * @res: result values from registers 0 to 3 * * This function is used to make HVC calls following SMC Calling * Convention. The content of the supplied param are copied to registers 0 * to 7 prior to the HVC instruction. The return values are updated with - * the content from register 0 to 3 on return from the HVC instruction. + * the content from register 0 to 3 on return from the HVC instruction. An + * optional quirk structure provides vendor specific behavior. */ -asmlinkage void arm_smccc_hvc(unsigned long a0, unsigned long a1, +asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4, unsigned long a5, unsigned long a6, unsigned long a7, - struct arm_smccc_res *res); + struct arm_smccc_res *res, struct arm_smccc_quirk *quirk); + +#define arm_smccc_smc(...) __arm_smccc_smc(__VA_ARGS__, NULL) + +#define arm_smccc_smc_quirk(...) __arm_smccc_smc(__VA_ARGS__) + +#define arm_smccc_hvc(...) __arm_smccc_hvc(__VA_ARGS__, NULL) + +#define arm_smccc_hvc_quirk(...) __arm_smccc_hvc(__VA_ARGS__) #endif /*__LINUX_ARM_SMCCC_H*/ -- cgit v1.2.3 From 82bcd087029f6056506ea929f11af02622230901 Mon Sep 17 00:00:00 2001 From: Andy Gross Date: Wed, 1 Feb 2017 11:28:28 -0600 Subject: firmware: qcom: scm: Fix interrupted SCM calls This patch adds a Qualcomm specific quirk to the arm_smccc_smc call. On Qualcomm ARM64 platforms, the SMC call can return before it has completed. If this occurs, the call can be restarted, but it requires using the returned session ID value from the interrupted SMC call. The quirk stores off the session ID from the interrupted call in the quirk structure so that it can be used by the caller. This patch folds in a fix given by Sricharan R: https://lkml.org/lkml/2016/9/28/272 Signed-off-by: Andy Gross Reviewed-by: Will Deacon Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index c66f8ae94b5a..b67934164401 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -14,9 +14,6 @@ #ifndef __LINUX_ARM_SMCCC_H #define __LINUX_ARM_SMCCC_H -#include -#include - /* * This file provides common defines for ARM SMC Calling Convention as * specified in @@ -60,6 +57,13 @@ #define ARM_SMCCC_OWNER_TRUSTED_OS 50 #define ARM_SMCCC_OWNER_TRUSTED_OS_END 63 +#define ARM_SMCCC_QUIRK_NONE 0 +#define ARM_SMCCC_QUIRK_QCOM_A6 1 /* Save/restore register a6 */ + +#ifndef __ASSEMBLY__ + +#include +#include /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 @@ -125,4 +129,5 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define arm_smccc_hvc_quirk(...) __arm_smccc_hvc(__VA_ARGS__) +#endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/ -- cgit v1.2.3 From b3c7ef0adadc5768e0baa786213c6bd1ce521a77 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 31 Jan 2017 22:59:53 -0800 Subject: bridge: uapi: add per vlan tunnel info New nested netlink attribute to associate tunnel info per vlan. This is used by bridge driver to send tunnel metadata to bridge ports in vlan tunnel mode. This patch also adds new per port flag IFLA_BRPORT_VLAN_TUNNEL to enable vlan tunnel mode. off by default. One example use for this is a vxlan bridging gateway or vtep which maps vlans to vn-segments (or vnis). User can configure per-vlan tunnel information which the bridge driver can use to bridge vlan into the corresponding vn-segment. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index debc9d5904e5..c5847dc75a93 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -47,6 +47,7 @@ struct br_ip_list { #define BR_PROXYARP_WIFI BIT(10) #define BR_MCAST_FLOOD BIT(11) #define BR_MULTICAST_TO_UNICAST BIT(12) +#define BR_VLAN_TUNNEL BIT(13) #define BR_DEFAULT_AGEING_TIME (300 * HZ) -- cgit v1.2.3 From 3898fac1f488c76e0eef5b5267b4ba8112a82ac4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 2 Feb 2017 17:09:54 +0100 Subject: trace: rename trace_print_hex_seq arg and add kdoc Steven suggested to improve trace_print_hex_seq() a bit after commit 2acae0d5b0f7 ("trace: add variant without spacing in trace_print_hex_seq") in two ways: i) by adding a kdoc comment for the helper function itself and ii) by renaming 'spacing' argument into 'concatenate' to better denote that we don't add spaces between each hex bytes. Suggested-by: Steven Rostedt Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/trace_events.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index cfa475a0e9ca..0f165507495c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -34,7 +34,7 @@ const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, const char *trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int len, - bool spacing); + bool concatenate); const char *trace_print_array_seq(struct trace_seq *p, const void *buf, int count, -- cgit v1.2.3 From b862815c3ee7b49ec20a9ab25da55a5f0bcbb95e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 3 Feb 2017 10:29:05 +0100 Subject: list: introduce list_for_each_entry_from_reverse helper Similar to list_for_each_entry_continue and its reverse variant list_for_each_entry_continue_reverse, introduce reverse helper for list_for_each_entry_from. Signed-off-by: Jiri Pirko Acked-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/list.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index d1039ecaf94f..ae537fa46216 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -526,6 +526,19 @@ static inline void list_splice_tail_init(struct list_head *list, for (; &pos->member != (head); \ pos = list_next_entry(pos, member)) +/** + * list_for_each_entry_from_reverse - iterate backwards over list of given type + * from the current point + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Iterate backwards over list of given type, continuing from current position. + */ +#define list_for_each_entry_from_reverse(pos, head, member) \ + for (; &pos->member != (head); \ + pos = list_prev_entry(pos, member)) + /** * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry * @pos: the type * to use as a loop cursor. -- cgit v1.2.3 From 44091d29f2075972aede47ef17e1e70db3d51190 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 3 Feb 2017 10:29:06 +0100 Subject: lib: Introduce priority array area manager This introduces a infrastructure for management of linear priority areas. Priority order in an array matters, however order of items inside a priority group does not matter. As an initial implementation, L-sort algorithm is used. It is quite trivial. More advanced algorithm called P-sort will be introduced as a follow-up. The infrastructure is prepared for other algos. Alongside this, a testing module is introduced as well. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/parman.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 include/linux/parman.h (limited to 'include/linux') diff --git a/include/linux/parman.h b/include/linux/parman.h new file mode 100644 index 000000000000..3c8cccc7d4da --- /dev/null +++ b/include/linux/parman.h @@ -0,0 +1,76 @@ +/* + * include/linux/parman.h - Manager for linear priority array areas + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Jiri Pirko + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _PARMAN_H +#define _PARMAN_H + +#include + +enum parman_algo_type { + PARMAN_ALGO_TYPE_LSORT, +}; + +struct parman_item { + struct list_head list; + unsigned long index; +}; + +struct parman_prio { + struct list_head list; + struct list_head item_list; + unsigned long priority; +}; + +struct parman_ops { + unsigned long base_count; + unsigned long resize_step; + int (*resize)(void *priv, unsigned long new_count); + void (*move)(void *priv, unsigned long from_index, + unsigned long to_index, unsigned long count); + enum parman_algo_type algo; +}; + +struct parman; + +struct parman *parman_create(const struct parman_ops *ops, void *priv); +void parman_destroy(struct parman *parman); +void parman_prio_init(struct parman *parman, struct parman_prio *prio, + unsigned long priority); +void parman_prio_fini(struct parman_prio *prio); +int parman_item_add(struct parman *parman, struct parman_prio *prio, + struct parman_item *item); +void parman_item_remove(struct parman *parman, struct parman_prio *prio, + struct parman_item *item); + +#endif -- cgit v1.2.3 From a96dfddbcc04336bbed50dc2b24823e45e09e80c Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Fri, 3 Feb 2017 13:13:23 -0800 Subject: base/memory, hotplug: fix a kernel oops in show_valid_zones() Reading a sysfs "memoryN/valid_zones" file leads to the following oops when the first page of a range is not backed by struct page. show_valid_zones() assumes that 'start_pfn' is always valid for page_zone(). BUG: unable to handle kernel paging request at ffffea017a000000 IP: show_valid_zones+0x6f/0x160 This issue may happen on x86-64 systems with 64GiB or more memory since their memory block size is bumped up to 2GiB. [1] An example of such systems is desribed below. 0x3240000000 is only aligned by 1GiB and this memory block starts from 0x3200000000, which is not backed by struct page. BIOS-e820: [mem 0x0000003240000000-0x000000603fffffff] usable Since test_pages_in_a_zone() already checks holes, fix this issue by extending this function to return 'valid_start' and 'valid_end' for a given range. show_valid_zones() then proceeds with the valid range. [1] 'Commit bdee237c0343 ("x86: mm: Use 2GB memory block size on large-memory x86-64 systems")' Link: http://lkml.kernel.org/r/20170127222149.30893-3-toshi.kani@hpe.com Signed-off-by: Toshi Kani Cc: Greg Kroah-Hartman Cc: Zhang Zhen Cc: Reza Arbab Cc: David Rientjes Cc: Dan Williams Cc: [4.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index c1784c0b4f35..134a2f69c21a 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -85,7 +85,8 @@ extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages); extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); /* VM interface that may be used by firmware interface */ extern int online_pages(unsigned long, unsigned long, int); -extern int test_pages_in_a_zone(unsigned long, unsigned long); +extern int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, + unsigned long *valid_start, unsigned long *valid_end); extern void __offline_isolated_pages(unsigned long, unsigned long); typedef void (*online_page_callback_t)(struct page *page); -- cgit v1.2.3 From 79e7fff47b7bb4124ef970a13eac4fdeddd1fc25 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Feb 2017 18:43:28 -0800 Subject: net: remove support for per driver ndo_busy_poll() We added generic support for busy polling in NAPI layer in linux-4.5 No network driver uses ndo_busy_poll() anymore, we can get rid of the pointer in struct net_device_ops, and its use in sk_busy_loop() Saves NETIF_F_BUSY_POLL features bit. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 -- include/linux/netdevice.h | 3 --- 2 files changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 9c6c8ef2e9e7..9a0419594e84 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -71,7 +71,6 @@ enum { NETIF_F_HW_VLAN_STAG_RX_BIT, /* Receive VLAN STAG HW acceleration */ NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */ NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */ - NETIF_F_BUSY_POLL_BIT, /* Busy poll */ NETIF_F_HW_TC_BIT, /* Offload TC infrastructure */ @@ -134,7 +133,6 @@ enum { #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) -#define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) #define NETIF_F_HW_TC __NETIF_F(HW_TC) #define for_each_netdev_feature(mask_addr, bit) \ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f3878fbe7786..6f18b509fb2f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1184,9 +1184,6 @@ struct net_device_ops { int (*ndo_netpoll_setup)(struct net_device *dev, struct netpoll_info *info); void (*ndo_netpoll_cleanup)(struct net_device *dev); -#endif -#ifdef CONFIG_NET_RX_BUSY_POLL - int (*ndo_busy_poll)(struct napi_struct *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, int queue, u8 *mac); -- cgit v1.2.3 From f9f41e3ef99ac9d4e91b07634362e393fb929aad Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 5 Jan 2017 10:17:27 +0530 Subject: cpufreq: Remove policy create/remove notifiers Those were added by: commit fcd7af917abb ("cpufreq: stats: handle cpufreq_unregister_driver() and suspend/resume properly") but aren't used anymore since: commit 1aefc75b2449 ("cpufreq: stats: Make the stats code non-modular"). Remove them. Also remove the redundant parameter to the respective routines. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 7e05c5e4e45c..a597bb825ae1 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -416,8 +416,6 @@ static inline void cpufreq_resume(void) {} #define CPUFREQ_ADJUST (0) #define CPUFREQ_NOTIFY (1) #define CPUFREQ_START (2) -#define CPUFREQ_CREATE_POLICY (3) -#define CPUFREQ_REMOVE_POLICY (4) #ifdef CONFIG_CPU_FREQ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list); -- cgit v1.2.3 From 052f573f5cca0ce0a16de409012660565bd792df Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 5 Jan 2017 11:34:31 +0530 Subject: cpufreq: Remove CPUFREQ_START notifier event Its not used anymore, remove it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a597bb825ae1..b07838b1fc60 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -415,7 +415,6 @@ static inline void cpufreq_resume(void) {} /* Policy Notifiers */ #define CPUFREQ_ADJUST (0) #define CPUFREQ_NOTIFY (1) -#define CPUFREQ_START (2) #ifdef CONFIG_CPU_FREQ int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list); -- cgit v1.2.3 From 565ebe8073f84ced436a18e76a5ba8e6bb73dfb3 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 3 Feb 2017 15:26:25 +0530 Subject: cpufreq: Fix typos in comments - s/freqnency/frequency/ - s/accomodating/accommodating/ Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index b07838b1fc60..87165f06a307 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -31,7 +31,7 @@ #define CPUFREQ_ETERNAL (-1) #define CPUFREQ_NAME_LEN 16 -/* Print length for names. Extra 1 space for accomodating '\n' in prints */ +/* Print length for names. Extra 1 space for accommodating '\n' in prints */ #define CPUFREQ_NAME_PLEN (CPUFREQ_NAME_LEN + 1) struct cpufreq_governor; @@ -115,7 +115,7 @@ struct cpufreq_policy { * guarantee that frequency can be changed on any CPU sharing the * policy and that the change will affect all of the policy CPUs then. * - fast_switch_enabled is to be set by governors that support fast - * freqnency switching with the help of cpufreq_enable_fast_switch(). + * frequency switching with the help of cpufreq_enable_fast_switch(). */ bool fast_switch_possible; bool fast_switch_enabled; -- cgit v1.2.3 From 668802c25729a8e3423015c33c05f1c3be3858e9 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 30 Jan 2017 12:57:43 -0500 Subject: tick/broadcast: Reduce lock cacheline contention It was observed that on an Intel x86 system without the ARAT (Always running APIC timer) feature and with fairly large number of CPUs as well as CPUs coming in and out of intel_idle frequently, the lock contention on the tick_broadcast_lock can become significant. To reduce contention, the lock is put into its own cacheline and all the cpumask_var_t variables are put into the __read_mostly section. Running the SP benchmark of the NAS Parallel Benchmarks on a 4-socket 16-core 32-thread Nehalam system, the performance number improved from 3353.94 Mop/s to 3469.31 Mop/s when this patch was applied on a 4.9.6 kernel. This is a 3.4% improvement. Signed-off-by: Waiman Long Cc: "Peter Zijlstra (Intel)" Cc: Andrew Morton Link: http://lkml.kernel.org/r/1485799063-20857-1-git-send-email-longman@redhat.com Signed-off-by: Thomas Gleixner --- include/linux/cpumask.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c717f5ea88cb..23c1a6d09ec5 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -649,11 +649,15 @@ static inline size_t cpumask_size(void) * used. Please use this_cpu_cpumask_var_t in those cases. The direct use * of this_cpu_ptr() or this_cpu_read() will lead to failures when the * other type of cpumask_var_t implementation is configured. + * + * Please also note that __cpumask_var_read_mostly can be used to declare + * a cpumask_var_t variable itself (not its content) as read mostly. */ #ifdef CONFIG_CPUMASK_OFFSTACK typedef struct cpumask *cpumask_var_t; -#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) +#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) +#define __cpumask_var_read_mostly __read_mostly bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); @@ -667,6 +671,7 @@ void free_bootmem_cpumask_var(cpumask_var_t mask); typedef struct cpumask cpumask_var_t[1]; #define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) +#define __cpumask_var_read_mostly static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { -- cgit v1.2.3 From 4b0947974e593d52aace18ca5c7e2746fdebae60 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 2 Feb 2017 14:53:10 +0100 Subject: gpio: Rename devm_get_gpiod_from_child() Rename devm_get_gpiod_from_child() into devm_fwnode_get_gpiod_from_child() to reflect the fact that this function is operating on a fwnode object. Signed-off-by: Boris Brezillon Acked-by: Jacek Anaszewski Acked-by: Dmitry Torokhov Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 80bad7ebde04..f32f49e96c0b 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -138,11 +138,11 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, const char *propname, enum gpiod_flags dflags, const char *label); -struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, - const char *con_id, - struct fwnode_handle *child, - enum gpiod_flags flags, - const char *label); +struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev, + const char *con_id, + struct fwnode_handle *child, + enum gpiod_flags flags, + const char *label); #else /* CONFIG_GPIOLIB */ static inline int gpiod_count(struct device *dev, const char *con_id) @@ -425,11 +425,11 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, } static inline -struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, - const char *con_id, - struct fwnode_handle *child, - enum gpiod_flags flags, - const char *label) +struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev, + const char *con_id, + struct fwnode_handle *child, + enum gpiod_flags flags, + const char *label) { return ERR_PTR(-ENOSYS); } -- cgit v1.2.3 From 537b94dafce29af6a3e923d216472cfc2f3659af Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 2 Feb 2017 14:53:11 +0100 Subject: gpio: Add the devm_fwnode_get_index_gpiod_from_child() helper devm_fwnode_get_gpiod_from_child() currently allows GPIO users to request a GPIO that is defined in a child fwnode instead of directly in the device fwnode. Extend this API by adding the devm_fwnode_get_index_gpiod_from_child() helper which does the same except you can also specify an index in case the 'xx-gpios' property describe several GPIOs. Signed-off-by: Boris Brezillon Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index f32f49e96c0b..ea9b01d40017 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -135,14 +135,14 @@ int desc_to_gpio(const struct gpio_desc *desc); struct fwnode_handle; struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, - const char *propname, + const char *propname, int index, enum gpiod_flags dflags, const char *label); -struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev, - const char *con_id, - struct fwnode_handle *child, - enum gpiod_flags flags, - const char *label); +struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, + const char *con_id, int index, + struct fwnode_handle *child, + enum gpiod_flags flags, + const char *label); #else /* CONFIG_GPIOLIB */ static inline int gpiod_count(struct device *dev, const char *con_id) @@ -417,13 +417,25 @@ struct fwnode_handle; static inline struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, - const char *propname, + const char *propname, int index, enum gpiod_flags dflags, const char *label) { return ERR_PTR(-ENOSYS); } +static inline +struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, + const char *con_id, int index, + struct fwnode_handle *child, + enum gpiod_flags flags, + const char *label) +{ + return ERR_PTR(-ENOSYS); +} + +#endif /* CONFIG_GPIOLIB */ + static inline struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev, const char *con_id, @@ -431,11 +443,10 @@ struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev, enum gpiod_flags flags, const char *label) { - return ERR_PTR(-ENOSYS); + return devm_fwnode_get_index_gpiod_from_child(dev, con_id, 0, child, + flags, label); } -#endif /* CONFIG_GPIOLIB */ - #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_GPIO_SYSFS) int gpiod_export(struct gpio_desc *desc, bool direction_may_change); -- cgit v1.2.3 From 02c1602ee7b3e3d062c3eacd374d6a6e3a2ebb73 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Feb 2017 15:25:02 -0800 Subject: net: remove __napi_complete() All __napi_complete() callers have been converted to use the more standard napi_complete_done(), we can now remove this NAPI method for good. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6f18b509fb2f..014fbe256d55 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -463,7 +463,6 @@ static inline bool napi_reschedule(struct napi_struct *napi) return false; } -bool __napi_complete(struct napi_struct *n); bool napi_complete_done(struct napi_struct *n, int work_done); /** * napi_complete - NAPI processing complete -- cgit v1.2.3 From 656441478ed55d960df5f3ccdf5a0f8c61dfd0b3 Mon Sep 17 00:00:00 2001 From: Mark Marshall Date: Thu, 26 Jan 2017 16:18:27 +0100 Subject: mtd: nand: ifc: Fix location of eccstat registers for IFC V1.0 The commit 7a654172161c ("mtd/ifc: Add support for IFC controller version 2.0") added support for version 2.0 of the IFC controller. The version 2.0 controller has the ECC status registers at a different location to the previous versions. Correct the fsl_ifc_nand structure so that the ECC status can be read from the correct location for both version 1.0 and 2.0 of the controller. Cc: stable@vger.kernel.org Fixes: 7a654172161c ("mtd/ifc: Add support for IFC controller version 2.0") Signed-off-by: Mark Marshall Signed-off-by: Boris Brezillon --- include/linux/fsl_ifc.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h index 3f9778cbc79d..c332f0a45607 100644 --- a/include/linux/fsl_ifc.h +++ b/include/linux/fsl_ifc.h @@ -733,8 +733,12 @@ struct fsl_ifc_nand { __be32 nand_erattr1; u32 res19[0x10]; __be32 nand_fsr; - u32 res20[0x3]; - __be32 nand_eccstat[6]; + u32 res20; + /* The V1 nand_eccstat is actually 4 words that overlaps the + * V2 nand_eccstat. + */ + __be32 v1_nand_eccstat[2]; + __be32 v2_nand_eccstat[6]; u32 res21[0x1c]; __be32 nanndcr; u32 res22[0x2]; -- cgit v1.2.3 From a61d5ce9cc56e2e41bbb1ad62ca7a16d7e7567bd Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 8 Dec 2016 12:58:45 +0200 Subject: net/mlx5: Fix static checker warnings For some reason, sparse doesn't like using an expression of type (!x) with a bitwise | and &. In order to mitigate that, we use a local variable. This removes the following sparse complaints on the core driver (and similar ones on the IB driver too): drivers/net/ethernet/mellanox/mlx5/core/srq.c:83:9: warning: dubious: !x & y drivers/net/ethernet/mellanox/mlx5/core/srq.c:96:9: warning: dubious: !x & y drivers/net/ethernet/mellanox/mlx5/core/port.c:59:9: warning: dubious: !x & y drivers/net/ethernet/mellanox/mlx5/core/vport.c:561:9: warning: dubious: !x & y Signed-off-by: Or Gerlitz Signed-off-by: Matan Barak Reported-by: Bart Van Assche Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 7b6cd67a263f..dd9a263ed368 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -67,10 +67,11 @@ /* insert a value to a struct */ #define MLX5_SET(typ, p, fld, v) do { \ + u32 _v = v; \ BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \ *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \ cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \ - (~__mlx5_dw_mask(typ, fld))) | (((v) & __mlx5_mask(typ, fld)) \ + (~__mlx5_dw_mask(typ, fld))) | (((_v) & __mlx5_mask(typ, fld)) \ << __mlx5_dw_bit_off(typ, fld))); \ } while (0) -- cgit v1.2.3 From a4077ce5871304f8a78f80b74b18b6052a410f1a Mon Sep 17 00:00:00 2001 From: "Andrey Jr. Melnikov" Date: Thu, 8 Dec 2016 19:57:08 +0300 Subject: mtd: nand: Add Winbond manufacturer id Add WINBOND manufacturer id. Signed-off-by: Andrey Jr. Melnikov Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index f67915c7726f..f6017a1a35d6 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -959,6 +959,7 @@ static inline void nand_set_controller_data(struct nand_chip *chip, void *priv) #define NAND_MFR_SANDISK 0x45 #define NAND_MFR_INTEL 0x89 #define NAND_MFR_ATO 0x9b +#define NAND_MFR_WINBOND 0xef /* The maximum expected count of bytes in the NAND ID sequence */ #define NAND_MAX_ID_LEN 8 -- cgit v1.2.3 From a1831bb9403720db6d4c033fe2d6bd0116dd28fe Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 1 Feb 2017 17:53:04 +0000 Subject: iommu/dma: Remove bogus dma_supported() implementation Back when this was first written, dma_supported() was somewhat of a murky mess, with subtly different interpretations being relied upon in various places. The "does device X support DMA to address range Y?" uses assuming Y to be physical addresses, which motivated the current iommu_dma_supported() implementation and are alluded to in the comment therein, have since been cleaned up, leaving only the far less ambiguous "can device X drive address bits Y" usage internal to DMA API mask setting. As such, there is no reason to keep a slightly misleading callback which does nothing but duplicate the current default behaviour; we already constrain IOVA allocations to the iommu_domain aperture where necessary, so let's leave DMA mask business to architecture-specific code where it belongs. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/dma-iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 3a846f9ec0fd..5725c94b1f12 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -67,7 +67,6 @@ dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs); void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs); -int iommu_dma_supported(struct device *dev, u64 mask); int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); /* The DMA API isn't _quite_ the whole story, though... */ -- cgit v1.2.3 From d254586c34538c0014280806c5d4795697cf21e5 Mon Sep 17 00:00:00 2001 From: David Jander Date: Fri, 10 Oct 2014 17:30:10 +0200 Subject: can: rx-offload: Add support for HW fifo based irq offloading Some CAN controllers have a usable FIFO already but can still benefit from off-loading the CAN controller FIFO. The CAN frames of the FIFO are read and put into a skb queue during interrupt and then transmitted in a NAPI context. Signed-off-by: David Jander Signed-off-by: Marc Kleine-Budde --- include/linux/can/rx-offload.h | 51 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 include/linux/can/rx-offload.h (limited to 'include/linux') diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h new file mode 100644 index 000000000000..cadfe9869600 --- /dev/null +++ b/include/linux/can/rx-offload.h @@ -0,0 +1,51 @@ +/* + * linux/can/rx-offload.h + * + * Copyright (c) 2014 David Jander, Protonic Holland + * Copyright (c) 2014-2017 Pengutronix, Marc Kleine-Budde + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the version 2 of the GNU General Public License + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _CAN_RX_OFFLOAD_H +#define _CAN_RX_OFFLOAD_H + +#include +#include + +struct can_rx_offload { + struct net_device *dev; + + unsigned int (*mailbox_read)(struct can_rx_offload *offload, struct can_frame *cf, unsigned int mb); + + struct sk_buff_head skb_queue; + u32 skb_queue_len_max; + + struct napi_struct napi; +}; + +int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight); +int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload); +int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb); +void can_rx_offload_reset(struct can_rx_offload *offload); +void can_rx_offload_del(struct can_rx_offload *offload); +void can_rx_offload_enable(struct can_rx_offload *offload); + +static inline void can_rx_offload_schedule(struct can_rx_offload *offload) +{ + napi_schedule(&offload->napi); +} + +static inline void can_rx_offload_disable(struct can_rx_offload *offload) +{ + napi_disable(&offload->napi); +} + +#endif /* !_CAN_RX_OFFLOAD_H */ -- cgit v1.2.3 From 3abbac0b5dd3e3b3dfb30a4885cdde5c20e1cb83 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 23 Sep 2014 15:28:21 +0200 Subject: can: rx-offload: Add support for timestamp based irq offloading Some CAN controllers don't implement a FIFO in hardware, but fill their mailboxes in a particular order (from lowest to highest or highest to lowest). This makes problems to read the frames in the correct order from the hardware, as new frames might be filled into just read (low) mailboxes. This gets worse, when following new frames are received into not read (higher) mailboxes. On the bright side some these CAN controllers put a timestamp on each received CAN frame. This patch adds support to offload CAN frames in interrupt context, order them by timestamp and then transmitted in a NAPI context. Signed-off-by: Marc Kleine-Budde --- include/linux/can/rx-offload.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index cadfe9869600..cb31683bbe15 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -23,15 +23,23 @@ struct can_rx_offload { struct net_device *dev; - unsigned int (*mailbox_read)(struct can_rx_offload *offload, struct can_frame *cf, unsigned int mb); + unsigned int (*mailbox_read)(struct can_rx_offload *offload, struct can_frame *cf, + u32 *timestamp, unsigned int mb); struct sk_buff_head skb_queue; u32 skb_queue_len_max; + unsigned int mb_first; + unsigned int mb_last; + struct napi_struct napi; + + bool inc; }; +int can_rx_offload_add_timestamp(struct net_device *dev, struct can_rx_offload *offload); int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight); +int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg); int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload); int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb); void can_rx_offload_reset(struct can_rx_offload *offload); -- cgit v1.2.3 From f32f5bd2eb7e91a5090c06bbe1ed14bffbbda5d3 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 19 Nov 2015 17:12:26 +0200 Subject: net/mlx5: Configure cache line size for start and end padding There is a hardware feature that will pad the start or end of a DMA to be cache line aligned to avoid RMWs on the last cache line. The default cache line size setting for this feature is 64B. This change configures the hardware to use 128B alignment on systems with 128B cache lines. In addition we lower bound MPWRQ stride by HCA cacheline in mlx5e, MPWRQ stride should be at least the HCA cacheline, the current default is 64B and in case HCA_CAP.cach_line_128byte capability is set, MPWRQ RX stride will automatically be aligned to 128B. Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a919dfb920ae..cc8ae860cd45 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -804,10 +804,12 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_150[0xa]; u8 log_max_ra_res_qp[0x6]; - u8 pad_cap[0x1]; + u8 end_pad[0x1]; u8 cc_query_allowed[0x1]; u8 cc_modify_allowed[0x1]; - u8 reserved_at_163[0xd]; + u8 start_pad[0x1]; + u8 cache_line_128byte[0x1]; + u8 reserved_at_163[0xb]; u8 gid_table_size[0x10]; u8 out_of_seq_cnt[0x1]; -- cgit v1.2.3 From 2b31f7ae5f645edd852addfca445895b5806f3f9 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 28 Nov 2016 18:04:50 +0200 Subject: net/mlx5: TX WQE update Add new TX WQE fields for Connect-X5 vlan insertion support, type and vlan_tci, when type = MLX5_ETH_WQE_INSERT_VLAN the HW will insert the vlan and prio fields (vlan_tci) to the packet. Those bits and the inline header fields are mutually exclusive, and valid only when: MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_NOT_REQUIRED and MLX5_CAP_ETH(mdev, wqe_vlan_insert), who will be set in ConnectX-5 and later HW generations. Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan --- include/linux/mlx5/mlx5_ifc.h | 3 ++- include/linux/mlx5/qp.h | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cc8ae860cd45..afcd4736d8df 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -577,7 +577,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 lro_cap[0x1]; u8 lro_psh_flag[0x1]; u8 lro_time_stamp[0x1]; - u8 reserved_at_5[0x3]; + u8 reserved_at_5[0x2]; + u8 wqe_vlan_insert[0x1]; u8 self_lb_en_modifiable[0x1]; u8 reserved_at_9[0x2]; u8 max_lso_cap[0x5]; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 219c699c17b7..3096370fe831 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -221,14 +221,26 @@ enum { MLX5_ETH_WQE_L4_CSUM = 1 << 7, }; +enum { + MLX5_ETH_WQE_INSERT_VLAN = 1 << 15, +}; + struct mlx5_wqe_eth_seg { u8 rsvd0[4]; u8 cs_flags; u8 rsvd1; __be16 mss; __be32 rsvd2; - __be16 inline_hdr_sz; - u8 inline_hdr_start[2]; + union { + struct { + __be16 sz; + u8 start[2]; + } inline_hdr; + struct { + __be16 type; + __be16 vlan_tci; + } insert; + }; }; struct mlx5_wqe_xrc_seg { -- cgit v1.2.3 From a8eca326151ee1beac82a4fd86d9edad3a37aaed Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 6 Feb 2017 16:20:14 +0100 Subject: net: remove ndo_neigh_{construct, destroy} from stacked devices In commit 18bfb924f000 ("net: introduce default neigh_construct/destroy ndo calls for L2 upper devices") we added these ndos to stacked devices such as team and bond, so that calls will be propagated to mlxsw. However, previous commit removed the reliance on these ndos and no new users of these ndos have appeared since above mentioned commit. We can therefore safely remove this dead code. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 014fbe256d55..58afbd1cc659 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3888,10 +3888,6 @@ void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); void netdev_lower_state_changed(struct net_device *lower_dev, void *lower_state_info); -int netdev_default_l2upper_neigh_construct(struct net_device *dev, - struct neighbour *n); -void netdev_default_l2upper_neigh_destroy(struct net_device *dev, - struct neighbour *n); /* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 -- cgit v1.2.3 From 455a7b238cd6bc68c4a550cbbd37c1e22b64f71c Mon Sep 17 00:00:00 2001 From: Scott Bauer Date: Fri, 3 Feb 2017 12:50:31 -0700 Subject: block: Add Sed-opal library This patch implements the necessary logic to bring an Opal enabled drive out of a factory-enabled into a working Opal state. This patch set also enables logic to save a password to be replayed during a resume from suspend. Signed-off-by: Scott Bauer Signed-off-by: Rafael Antognolli Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/sed-opal.h | 178 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 include/linux/sed-opal.h (limited to 'include/linux') diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h new file mode 100644 index 000000000000..af1a85eae193 --- /dev/null +++ b/include/linux/sed-opal.h @@ -0,0 +1,178 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Authors: + * Rafael Antognolli + * Scott Bauer + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef LINUX_OPAL_H +#define LINUX_OPAL_H + +#include +#include + +/* + * These constant values come from: + * SPC-4 section + * 6.30 SECURITY PROTOCOL IN command / table 265. + */ +enum { + TCG_SECP_00 = 0, + TCG_SECP_01, +}; +struct opal_dev; + +#define IO_BUFFER_LENGTH 2048 +#define MAX_TOKS 64 + +typedef int (*opal_step)(struct opal_dev *dev); +typedef int (sec_send_recv)(struct opal_dev *ctx, u16 spsp, u8 secp, + void *buffer, size_t len, bool send); + + +enum opal_atom_width { + OPAL_WIDTH_TINY, + OPAL_WIDTH_SHORT, + OPAL_WIDTH_MEDIUM, + OPAL_WIDTH_LONG, + OPAL_WIDTH_TOKEN +}; + +/* + * Token defs derived from: + * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 + * 3.2.2 Data Stream Encoding + */ +enum opal_response_token { + OPAL_DTA_TOKENID_BYTESTRING = 0xe0, + OPAL_DTA_TOKENID_SINT = 0xe1, + OPAL_DTA_TOKENID_UINT = 0xe2, + OPAL_DTA_TOKENID_TOKEN = 0xe3, /* actual token is returned */ + OPAL_DTA_TOKENID_INVALID = 0X0 +}; + +/* + * On the parsed response, we don't store again the toks that are already + * stored in the response buffer. Instead, for each token, we just store a + * pointer to the position in the buffer where the token starts, and the size + * of the token in bytes. + */ +struct opal_resp_tok { + const u8 *pos; + size_t len; + enum opal_response_token type; + enum opal_atom_width width; + union { + u64 u; + s64 s; + } stored; +}; + +/* + * From the response header it's not possible to know how many tokens there are + * on the payload. So we hardcode that the maximum will be MAX_TOKS, and later + * if we start dealing with messages that have more than that, we can increase + * this number. This is done to avoid having to make two passes through the + * response, the first one counting how many tokens we have and the second one + * actually storing the positions. + */ +struct parsed_resp { + int num; + struct opal_resp_tok toks[MAX_TOKS]; +}; + +/** + * struct opal_dev - The structure representing a OPAL enabled SED. + * @supported: Whether or not OPAL is supported on this controller. + * @send_recv: The combined sec_send/sec_recv function pointer. + * @opal_step: A series of opal methods that are necessary to complete a command. + * @func_data: An array of parameters for the opal methods above. + * @state: Describes the current opal_step we're working on. + * @dev_lock: Locks the entire opal_dev structure. + * @parsed: Parsed response from controller. + * @prev_data: Data returned from a method to the controller. + * @unlk_lst: A list of Locking ranges to unlock on this device during a resume. + */ +struct opal_dev { + bool initialized; + bool supported; + sec_send_recv *send_recv; + + const opal_step *funcs; + void **func_data; + int state; + struct mutex dev_lock; + u16 comid; + u32 hsn; + u32 tsn; + u64 align; + u64 lowest_lba; + + size_t pos; + u8 cmd[IO_BUFFER_LENGTH]; + u8 resp[IO_BUFFER_LENGTH]; + + struct parsed_resp parsed; + size_t prev_d_len; + void *prev_data; + + struct list_head unlk_lst; +}; + +#ifdef CONFIG_BLK_SED_OPAL +bool opal_unlock_from_suspend(struct opal_dev *dev); +void init_opal_dev(struct opal_dev *opal_dev, sec_send_recv *send_recv); +int sed_ioctl(struct opal_dev *dev, unsigned int cmd, unsigned long ptr); + +static inline bool is_sed_ioctl(unsigned int cmd) +{ + switch (cmd) { + case IOC_OPAL_SAVE: + case IOC_OPAL_LOCK_UNLOCK: + case IOC_OPAL_TAKE_OWNERSHIP: + case IOC_OPAL_ACTIVATE_LSP: + case IOC_OPAL_SET_PW: + case IOC_OPAL_ACTIVATE_USR: + case IOC_OPAL_REVERT_TPR: + case IOC_OPAL_LR_SETUP: + case IOC_OPAL_ADD_USR_TO_LR: + case IOC_OPAL_ENABLE_DISABLE_MBR: + case IOC_OPAL_ERASE_LR: + case IOC_OPAL_SECURE_ERASE_LR: + return true; + } + return false; +} +#else +static inline bool is_sed_ioctl(unsigned int cmd) +{ + return false; +} + +static inline int sed_ioctl(struct opal_dev *dev, unsigned int cmd, + unsigned long ptr) +{ + return 0; +} +static inline bool opal_unlock_from_suspend(struct opal_dev *dev) +{ + return false; +} +static inline void init_opal_dev(struct opal_dev *opal_dev, + sec_send_recv *send_recv) +{ + opal_dev->supported = false; + opal_dev->initialized = true; +} +#endif /* CONFIG_BLK_SED_OPAL */ +#endif /* LINUX_OPAL_H */ -- cgit v1.2.3 From 2aad40d911eeb7dcac91c669f2762a28134f0eb1 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 27 Jan 2017 03:12:57 -0800 Subject: remoteproc: Move qcom_mdt_loader into drivers/soc/qcom With the remoteproc parts cleaned out of the MDT loader we can move it to drivers/soc/qcom. Acked-by: Andy Gross Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/mdt_loader.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/linux/soc/qcom/mdt_loader.h (limited to 'include/linux') diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h new file mode 100644 index 000000000000..f423001db3a9 --- /dev/null +++ b/include/linux/soc/qcom/mdt_loader.h @@ -0,0 +1,18 @@ +#ifndef __QCOM_MDT_LOADER_H__ +#define __QCOM_MDT_LOADER_H__ + +#include + +#define QCOM_MDT_TYPE_MASK (7 << 24) +#define QCOM_MDT_TYPE_HASH (2 << 24) +#define QCOM_MDT_RELOCATABLE BIT(27) + +struct device; +struct firmware; + +ssize_t qcom_mdt_get_size(const struct firmware *fw); +int qcom_mdt_load(struct device *dev, const struct firmware *fw, + const char *fw_name, int pas_id, void *mem_region, + phys_addr_t mem_phys, size_t mem_size); + +#endif -- cgit v1.2.3 From 4d29b2e5ad37a50b186f828d73086cdbf36580bf Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 2 Feb 2017 01:30:49 +0100 Subject: PM / core: Update kerneldoc comments in pm.h Refresh the struct dev_pm_ops kerneldoc comment, so that it looks better and is more readable after processing by Sphinx, and drop the kerneldoc marker from a few other comments ("PM_EVENT_ messages" and a couple of enum types declarations) which are not proper kerneldoc and generally confuse Sphinx. Also change the comment describing struct dev_pm_domain into a kerneldoc one. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jonathan Corbet --- include/linux/pm.h | 113 +++++++++++++++++++++++++++-------------------------- 1 file changed, 57 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index f926af41e122..10867b11d503 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -64,24 +64,7 @@ typedef struct pm_message { } pm_message_t; /** - * struct dev_pm_ops - device PM callbacks - * - * Several device power state transitions are externally visible, affecting - * the state of pending I/O queues and (for drivers that touch hardware) - * interrupts, wakeups, DMA, and other hardware state. There may also be - * internal transitions to various low-power modes which are transparent - * to the rest of the driver stack (such as a driver that's ON gating off - * clocks which are not in active use). - * - * The externally visible transitions are handled with the help of callbacks - * included in this structure in such a way that two levels of callbacks are - * involved. First, the PM core executes callbacks provided by PM domains, - * device types, classes and bus types. They are the subsystem-level callbacks - * supposed to execute callbacks provided by device drivers, although they may - * choose not to do that. If the driver callbacks are executed, they have to - * collaborate with the subsystem-level callbacks to achieve the goals - * appropriate for the given system transition, given transition phase and the - * subsystem the device belongs to. + * struct dev_pm_ops - device PM callbacks. * * @prepare: The principal role of this callback is to prevent new children of * the device from being registered after it has returned (the driver's @@ -240,34 +223,6 @@ typedef struct pm_message { * driver's interrupt handler, which is guaranteed not to run while * @restore_noirq() is being executed. Analogous to @resume_noirq(). * - * All of the above callbacks, except for @complete(), return error codes. - * However, the error codes returned by the resume operations, @resume(), - * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq(), do - * not cause the PM core to abort the resume transition during which they are - * returned. The error codes returned in those cases are only printed by the PM - * core to the system logs for debugging purposes. Still, it is recommended - * that drivers only return error codes from their resume methods in case of an - * unrecoverable failure (i.e. when the device being handled refuses to resume - * and becomes unusable) to allow us to modify the PM core in the future, so - * that it can avoid attempting to handle devices that failed to resume and - * their children. - * - * It is allowed to unregister devices while the above callbacks are being - * executed. However, a callback routine must NOT try to unregister the device - * it was called for, although it may unregister children of that device (for - * example, if it detects that a child was unplugged while the system was - * asleep). - * - * Refer to Documentation/power/admin-guide/devices.rst for more information about the role - * of the above callbacks in the system suspend process. - * - * There also are callbacks related to runtime power management of devices. - * Again, these callbacks are executed by the PM core only for subsystems - * (PM domains, device types, classes and bus types) and the subsystem-level - * callbacks are supposed to invoke the driver callbacks. Moreover, the exact - * actions to be performed by a device driver's callbacks generally depend on - * the platform and subsystem the device belongs to. - * * @runtime_suspend: Prepare the device for a condition in which it won't be * able to communicate with the CPU(s) and RAM due to power management. * This need not mean that the device should be put into a low-power state. @@ -287,11 +242,54 @@ typedef struct pm_message { * Check these conditions, and return 0 if it's appropriate to let the PM * core queue a suspend request for the device. * - * Refer to Documentation/power/runtime_pm.txt for more information about the - * role of the above callbacks in device runtime power management. + * Several device power state transitions are externally visible, affecting + * the state of pending I/O queues and (for drivers that touch hardware) + * interrupts, wakeups, DMA, and other hardware state. There may also be + * internal transitions to various low-power modes which are transparent + * to the rest of the driver stack (such as a driver that's ON gating off + * clocks which are not in active use). * + * The externally visible transitions are handled with the help of callbacks + * included in this structure in such a way that, typically, two levels of + * callbacks are involved. First, the PM core executes callbacks provided by PM + * domains, device types, classes and bus types. They are the subsystem-level + * callbacks expected to execute callbacks provided by device drivers, although + * they may choose not to do that. If the driver callbacks are executed, they + * have to collaborate with the subsystem-level callbacks to achieve the goals + * appropriate for the given system transition, given transition phase and the + * subsystem the device belongs to. + * + * All of the above callbacks, except for @complete(), return error codes. + * However, the error codes returned by @resume(), @thaw(), @restore(), + * @resume_noirq(), @thaw_noirq(), and @restore_noirq(), do not cause the PM + * core to abort the resume transition during which they are returned. The + * error codes returned in those cases are only printed to the system logs for + * debugging purposes. Still, it is recommended that drivers only return error + * codes from their resume methods in case of an unrecoverable failure (i.e. + * when the device being handled refuses to resume and becomes unusable) to + * allow the PM core to be modified in the future, so that it can avoid + * attempting to handle devices that failed to resume and their children. + * + * It is allowed to unregister devices while the above callbacks are being + * executed. However, a callback routine MUST NOT try to unregister the device + * it was called for, although it may unregister children of that device (for + * example, if it detects that a child was unplugged while the system was + * asleep). + * + * Refer to Documentation/power/devices.txt for more information about the role + * of the above callbacks in the system suspend process. + * + * There also are callbacks related to runtime power management of devices. + * Again, as a rule these callbacks are executed by the PM core for subsystems + * (PM domains, device types, classes and bus types) and the subsystem-level + * callbacks are expected to invoke the driver callbacks. Moreover, the exact + * actions to be performed by a device driver's callbacks generally depend on + * the platform and subsystem the device belongs to. + * + * Refer to Documentation/power/runtime_pm.txt for more information about the + * role of the @runtime_suspend(), @runtime_resume() and @runtime_idle() + * callbacks in device runtime power management. */ - struct dev_pm_ops { int (*prepare)(struct device *dev); void (*complete)(struct device *dev); @@ -391,7 +389,7 @@ const struct dev_pm_ops name = { \ SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ } -/** +/* * PM_EVENT_ messages * * The following PM_EVENT_ messages are defined for the internal use of the PM @@ -487,7 +485,7 @@ const struct dev_pm_ops name = { \ #define PMSG_IS_AUTO(msg) (((msg).event & PM_EVENT_AUTO) != 0) -/** +/* * Device run-time power management status. * * These status labels are used internally by the PM core to indicate the @@ -517,7 +515,7 @@ enum rpm_status { RPM_SUSPENDING, }; -/** +/* * Device run-time power management request types. * * RPM_REQ_NONE Do nothing. @@ -616,15 +614,18 @@ extern void update_pm_runtime_accounting(struct device *dev); extern int dev_pm_get_subsys_data(struct device *dev); extern void dev_pm_put_subsys_data(struct device *dev); -/* - * Power domains provide callbacks that are executed during system suspend, - * hibernation, system resume and during runtime PM transitions along with - * subsystem-level and driver-level callbacks. +/** + * struct dev_pm_domain - power management domain representation. * + * @ops: Power management operations associated with this domain. * @detach: Called when removing a device from the domain. * @activate: Called before executing probe routines for bus types and drivers. * @sync: Called after successful driver probe. * @dismiss: Called after unsuccessful driver probe and after driver removal. + * + * Power domains provide callbacks that are executed during system suspend, + * hibernation, system resume and during runtime PM transitions instead of + * subsystem-level and driver-level callbacks. */ struct dev_pm_domain { struct dev_pm_ops ops; -- cgit v1.2.3 From 2728b2d2e5be4b828a523a06089cd605419fc65c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 2 Feb 2017 01:32:13 +0100 Subject: PM / core / docs: Convert sleep states API document to reST Move the document describing the system sleep state transitions API for devices to Documentation/driver-api/pm/, convert it to reST and update it to use current terminology. Also remove the remaining reference to the old version of it from pm.h. The new document still contains references to some documents in the .txt format that will be converted later. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jonathan Corbet --- include/linux/pm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 10867b11d503..a0894bc52bb4 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -276,9 +276,6 @@ typedef struct pm_message { * example, if it detects that a child was unplugged while the system was * asleep). * - * Refer to Documentation/power/devices.txt for more information about the role - * of the above callbacks in the system suspend process. - * * There also are callbacks related to runtime power management of devices. * Again, as a rule these callbacks are executed by the PM core for subsystems * (PM domains, device types, classes and bus types) and the subsystem-level -- cgit v1.2.3 From 88e30752dd47f0a6398cd014af82332f1b9873ea Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 30 Jan 2017 09:00:07 -0800 Subject: rpmsg: qcom: smd: Return positively when not enabled Remoteproc treats the error codes returned from the stubbed SMD API as errors, but the fact that SMD is not enabled should not affect remoteproc's ability to start the remote processors. Signed-off-by: Bjorn Andersson --- include/linux/rpmsg/qcom_smd.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg/qcom_smd.h b/include/linux/rpmsg/qcom_smd.h index e674b2e3074b..8ec8b6439b25 100644 --- a/include/linux/rpmsg/qcom_smd.h +++ b/include/linux/rpmsg/qcom_smd.h @@ -18,14 +18,12 @@ static inline struct qcom_smd_edge * qcom_smd_register_edge(struct device *parent, struct device_node *node) { - return ERR_PTR(-ENXIO); + return NULL; } static inline int qcom_smd_unregister_edge(struct qcom_smd_edge *edge) { - /* This shouldn't be possible */ - WARN_ON(1); - return -ENXIO; + return 0; } #endif -- cgit v1.2.3 From 1f318a8bafcfba9f0d623f4870c4e890fd22e659 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 1 Feb 2017 18:00:14 +0100 Subject: modules: mark __inittest/__exittest as __maybe_unused clang warns about unused inline functions by default: arch/arm/crypto/aes-cipher-glue.c:68:1: warning: unused function '__inittest' [-Wunused-function] arch/arm/crypto/aes-cipher-glue.c:69:1: warning: unused function '__exittest' [-Wunused-function] As these appear in every single module, let's just disable the warnings by marking the two functions as __maybe_unused. Signed-off-by: Arnd Bergmann Reviewed-by: Miroslav Benes Acked-by: Rusty Russell Signed-off-by: Jessica Yu --- include/linux/module.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index ef599379f9a4..4e09f469bd7d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -127,13 +127,13 @@ extern void cleanup_module(void); /* Each module must use one module_init(). */ #define module_init(initfn) \ - static inline initcall_t __inittest(void) \ + static inline initcall_t __maybe_unused __inittest(void) \ { return initfn; } \ int init_module(void) __attribute__((alias(#initfn))); /* This is only required if you want to be unloadable. */ #define module_exit(exitfn) \ - static inline exitcall_t __exittest(void) \ + static inline exitcall_t __maybe_unused __exittest(void) \ { return exitfn; } \ void cleanup_module(void) __attribute__((alias(#exitfn))); -- cgit v1.2.3 From b6a05c823fc573a65efc4466f174abf05f922e0f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 30 Jan 2017 13:18:58 +0100 Subject: scsi: remove eh_timed_out methods in the transport template Instead define the timeout behavior purely based on the host_template eh_timed_out method and wire up the existing transport implementations in the host templates. This also clears up the confusion that the transport template method overrides the host template one, so some drivers have to re-override the transport template one. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- include/linux/libata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index c170be548b7f..46e18c0619c6 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1130,6 +1130,7 @@ extern int ata_sas_port_start(struct ata_port *ap); extern void ata_sas_port_stop(struct ata_port *ap); extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *); extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap); +extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); extern int sata_scr_valid(struct ata_link *link); extern int sata_scr_read(struct ata_link *link, int reg, u32 *val); extern int sata_scr_write(struct ata_link *link, int reg, u32 val); @@ -1355,6 +1356,7 @@ extern struct device_attribute *ata_common_sdev_attrs[]; .proc_name = drv_name, \ .slave_configure = ata_scsi_slave_config, \ .slave_destroy = ata_scsi_slave_destroy, \ + .eh_timed_out = ata_scsi_timed_out, \ .bios_param = ata_std_bios_param, \ .unlock_native_capacity = ata_scsi_unlock_native_capacity, \ .sdev_attrs = ata_common_sdev_attrs -- cgit v1.2.3 From 46f47e48008b63f5fd3a3bad8b79ba1a89fb625f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 24 Jan 2017 10:58:06 -0800 Subject: fscrypt: split supp and notsupp declarations into their own headers Previously, each filesystem configured without encryption support would define all the public fscrypt functions to their notsupp_* stubs. This list of #defines had to be updated in every filesystem whenever a change was made to the public fscrypt functions. To make things more maintainable now that we have three filesystems using fscrypt, split the old header fscrypto.h into several new headers. fscrypt_supp.h contains the real declarations and is included by filesystems when configured with encryption support, whereas fscrypt_notsupp.h contains the inline stubs and is included by filesystems when configured without encryption support. fscrypt_common.h contains common declarations needed by both. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_common.h | 146 +++++++++++++++++ include/linux/fscrypt_notsupp.h | 168 +++++++++++++++++++ include/linux/fscrypt_supp.h | 66 ++++++++ include/linux/fscrypto.h | 347 ---------------------------------------- 4 files changed, 380 insertions(+), 347 deletions(-) create mode 100644 include/linux/fscrypt_common.h create mode 100644 include/linux/fscrypt_notsupp.h create mode 100644 include/linux/fscrypt_supp.h delete mode 100644 include/linux/fscrypto.h (limited to 'include/linux') diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h new file mode 100644 index 000000000000..547f81592ba1 --- /dev/null +++ b/include/linux/fscrypt_common.h @@ -0,0 +1,146 @@ +/* + * fscrypt_common.h: common declarations for per-file encryption + * + * Copyright (C) 2015, Google, Inc. + * + * Written by Michael Halcrow, 2015. + * Modified by Jaegeuk Kim, 2015. + */ + +#ifndef _LINUX_FSCRYPT_COMMON_H +#define _LINUX_FSCRYPT_COMMON_H + +#include +#include +#include +#include +#include +#include +#include + +#define FS_CRYPTO_BLOCK_SIZE 16 + +struct fscrypt_info; + +struct fscrypt_ctx { + union { + struct { + struct page *bounce_page; /* Ciphertext page */ + struct page *control_page; /* Original page */ + } w; + struct { + struct bio *bio; + struct work_struct work; + } r; + struct list_head free_list; /* Free list */ + }; + u8 flags; /* Flags */ +}; + +/** + * For encrypted symlinks, the ciphertext length is stored at the beginning + * of the string in little-endian format. + */ +struct fscrypt_symlink_data { + __le16 len; + char encrypted_path[1]; +} __packed; + +/** + * This function is used to calculate the disk space required to + * store a filename of length l in encrypted symlink format. + */ +static inline u32 fscrypt_symlink_data_len(u32 l) +{ + if (l < FS_CRYPTO_BLOCK_SIZE) + l = FS_CRYPTO_BLOCK_SIZE; + return (l + sizeof(struct fscrypt_symlink_data) - 1); +} + +struct fscrypt_str { + unsigned char *name; + u32 len; +}; + +struct fscrypt_name { + const struct qstr *usr_fname; + struct fscrypt_str disk_name; + u32 hash; + u32 minor_hash; + struct fscrypt_str crypto_buf; +}; + +#define FSTR_INIT(n, l) { .name = n, .len = l } +#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) +#define fname_name(p) ((p)->disk_name.name) +#define fname_len(p) ((p)->disk_name.len) + +/* + * fscrypt superblock flags + */ +#define FS_CFLG_OWN_PAGES (1U << 1) + +/* + * crypto opertions for filesystems + */ +struct fscrypt_operations { + unsigned int flags; + const char *key_prefix; + int (*get_context)(struct inode *, void *, size_t); + int (*prepare_context)(struct inode *); + int (*set_context)(struct inode *, const void *, size_t, void *); + int (*dummy_context)(struct inode *); + bool (*is_encrypted)(struct inode *); + bool (*empty_dir)(struct inode *); + unsigned (*max_namelen)(struct inode *); +}; + +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + if (inode->i_sb->s_cop->dummy_context && + inode->i_sb->s_cop->dummy_context(inode)) + return true; + return false; +} + +static inline bool fscrypt_valid_contents_enc_mode(u32 mode) +{ + return (mode == FS_ENCRYPTION_MODE_AES_256_XTS); +} + +static inline bool fscrypt_valid_filenames_enc_mode(u32 mode) +{ + return (mode == FS_ENCRYPTION_MODE_AES_256_CTS); +} + +static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) +{ + if (str->len == 1 && str->name[0] == '.') + return true; + + if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') + return true; + + return false; +} + +static inline struct page *fscrypt_control_page(struct page *page) +{ +#if IS_ENABLED(CONFIG_FS_ENCRYPTION) + return ((struct fscrypt_ctx *)page_private(page))->w.control_page; +#else + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); +#endif +} + +static inline int fscrypt_has_encryption_key(const struct inode *inode) +{ +#if IS_ENABLED(CONFIG_FS_ENCRYPTION) + return (inode->i_crypt_info != NULL); +#else + return 0; +#endif +} + +#endif /* _LINUX_FSCRYPT_COMMON_H */ diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h new file mode 100644 index 000000000000..3511ca798804 --- /dev/null +++ b/include/linux/fscrypt_notsupp.h @@ -0,0 +1,168 @@ +/* + * fscrypt_notsupp.h + * + * This stubs out the fscrypt functions for filesystems configured without + * encryption support. + */ + +#ifndef _LINUX_FSCRYPT_NOTSUPP_H +#define _LINUX_FSCRYPT_NOTSUPP_H + +#include + +/* crypto.c */ +static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, + gfp_t gfp_flags) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx) +{ + return; +} + +static inline struct page *fscrypt_encrypt_page(const struct inode *inode, + struct page *page, + unsigned int len, + unsigned int offs, + u64 lblk_num, gfp_t gfp_flags) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline int fscrypt_decrypt_page(const struct inode *inode, + struct page *page, + unsigned int len, unsigned int offs, + u64 lblk_num) +{ + return -EOPNOTSUPP; +} + + +static inline void fscrypt_restore_control_page(struct page *page) +{ + return; +} + +static inline void fscrypt_set_d_op(struct dentry *dentry) +{ + return; +} + +static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry) +{ + return; +} + +/* policy.c */ +static inline int fscrypt_ioctl_set_policy(struct file *filp, + const void __user *arg) +{ + return -EOPNOTSUPP; +} + +static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) +{ + return -EOPNOTSUPP; +} + +static inline int fscrypt_has_permitted_context(struct inode *parent, + struct inode *child) +{ + return 0; +} + +static inline int fscrypt_inherit_context(struct inode *parent, + struct inode *child, + void *fs_data, bool preload) +{ + return -EOPNOTSUPP; +} + +/* keyinfo.c */ +static inline int fscrypt_get_encryption_info(struct inode *inode) +{ + return -EOPNOTSUPP; +} + +static inline void fscrypt_put_encryption_info(struct inode *inode, + struct fscrypt_info *ci) +{ + return; +} + + /* fname.c */ +static inline int fscrypt_setup_filename(struct inode *dir, + const struct qstr *iname, + int lookup, struct fscrypt_name *fname) +{ + if (dir->i_sb->s_cop->is_encrypted(dir)) + return -EOPNOTSUPP; + + memset(fname, 0, sizeof(struct fscrypt_name)); + fname->usr_fname = iname; + fname->disk_name.name = (unsigned char *)iname->name; + fname->disk_name.len = iname->len; + return 0; +} + +static inline void fscrypt_free_filename(struct fscrypt_name *fname) +{ + return; +} + +static inline u32 fscrypt_fname_encrypted_size(const struct inode *inode, + u32 ilen) +{ + /* never happens */ + WARN_ON(1); + return 0; +} + +static inline int fscrypt_fname_alloc_buffer(const struct inode *inode, + u32 ilen, + struct fscrypt_str *crypto_str) +{ + return -EOPNOTSUPP; +} + +static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) +{ + return; +} + +static inline int fscrypt_fname_disk_to_usr(struct inode *inode, + u32 hash, u32 minor_hash, + const struct fscrypt_str *iname, + struct fscrypt_str *oname) +{ + return -EOPNOTSUPP; +} + +static inline int fscrypt_fname_usr_to_disk(struct inode *inode, + const struct qstr *iname, + struct fscrypt_str *oname) +{ + return -EOPNOTSUPP; +} + +/* bio.c */ +static inline void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, + struct bio *bio) +{ + return; +} + +static inline void fscrypt_pullback_bio_page(struct page **page, bool restore) +{ + return; +} + +static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, + sector_t pblk, unsigned int len) +{ + return -EOPNOTSUPP; +} + +#endif /* _LINUX_FSCRYPT_NOTSUPP_H */ diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h new file mode 100644 index 000000000000..a140f47e9b27 --- /dev/null +++ b/include/linux/fscrypt_supp.h @@ -0,0 +1,66 @@ +/* + * fscrypt_supp.h + * + * This is included by filesystems configured with encryption support. + */ + +#ifndef _LINUX_FSCRYPT_SUPP_H +#define _LINUX_FSCRYPT_SUPP_H + +#include + +/* crypto.c */ +extern struct kmem_cache *fscrypt_info_cachep; +extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); +extern void fscrypt_release_ctx(struct fscrypt_ctx *); +extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, + unsigned int, unsigned int, + u64, gfp_t); +extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, + unsigned int, u64); +extern void fscrypt_restore_control_page(struct page *); + +extern const struct dentry_operations fscrypt_d_ops; + +static inline void fscrypt_set_d_op(struct dentry *dentry) +{ + d_set_d_op(dentry, &fscrypt_d_ops); +} + +static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY; + spin_unlock(&dentry->d_lock); +} + +/* policy.c */ +extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); +extern int fscrypt_ioctl_get_policy(struct file *, void __user *); +extern int fscrypt_has_permitted_context(struct inode *, struct inode *); +extern int fscrypt_inherit_context(struct inode *, struct inode *, + void *, bool); +/* keyinfo.c */ +extern int fscrypt_get_encryption_info(struct inode *); +extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *); + +/* fname.c */ +extern int fscrypt_setup_filename(struct inode *, const struct qstr *, + int lookup, struct fscrypt_name *); +extern void fscrypt_free_filename(struct fscrypt_name *); +extern u32 fscrypt_fname_encrypted_size(const struct inode *, u32); +extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, + struct fscrypt_str *); +extern void fscrypt_fname_free_buffer(struct fscrypt_str *); +extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, + const struct fscrypt_str *, struct fscrypt_str *); +extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, + struct fscrypt_str *); + +/* bio.c */ +extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *); +extern void fscrypt_pullback_bio_page(struct page **, bool); +extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, + unsigned int); + +#endif /* _LINUX_FSCRYPT_SUPP_H */ diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h deleted file mode 100644 index 715f17b3c6d7..000000000000 --- a/include/linux/fscrypto.h +++ /dev/null @@ -1,347 +0,0 @@ -/* - * General per-file encryption definition - * - * Copyright (C) 2015, Google, Inc. - * - * Written by Michael Halcrow, 2015. - * Modified by Jaegeuk Kim, 2015. - */ - -#ifndef _LINUX_FSCRYPTO_H -#define _LINUX_FSCRYPTO_H - -#include -#include -#include -#include -#include -#include -#include - -#define FS_CRYPTO_BLOCK_SIZE 16 - -struct fscrypt_info; - -struct fscrypt_ctx { - union { - struct { - struct page *bounce_page; /* Ciphertext page */ - struct page *control_page; /* Original page */ - } w; - struct { - struct bio *bio; - struct work_struct work; - } r; - struct list_head free_list; /* Free list */ - }; - u8 flags; /* Flags */ -}; - -/** - * For encrypted symlinks, the ciphertext length is stored at the beginning - * of the string in little-endian format. - */ -struct fscrypt_symlink_data { - __le16 len; - char encrypted_path[1]; -} __packed; - -/** - * This function is used to calculate the disk space required to - * store a filename of length l in encrypted symlink format. - */ -static inline u32 fscrypt_symlink_data_len(u32 l) -{ - if (l < FS_CRYPTO_BLOCK_SIZE) - l = FS_CRYPTO_BLOCK_SIZE; - return (l + sizeof(struct fscrypt_symlink_data) - 1); -} - -struct fscrypt_str { - unsigned char *name; - u32 len; -}; - -struct fscrypt_name { - const struct qstr *usr_fname; - struct fscrypt_str disk_name; - u32 hash; - u32 minor_hash; - struct fscrypt_str crypto_buf; -}; - -#define FSTR_INIT(n, l) { .name = n, .len = l } -#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) -#define fname_name(p) ((p)->disk_name.name) -#define fname_len(p) ((p)->disk_name.len) - -/* - * fscrypt superblock flags - */ -#define FS_CFLG_OWN_PAGES (1U << 1) - -/* - * crypto opertions for filesystems - */ -struct fscrypt_operations { - unsigned int flags; - const char *key_prefix; - int (*get_context)(struct inode *, void *, size_t); - int (*prepare_context)(struct inode *); - int (*set_context)(struct inode *, const void *, size_t, void *); - int (*dummy_context)(struct inode *); - bool (*is_encrypted)(struct inode *); - bool (*empty_dir)(struct inode *); - unsigned (*max_namelen)(struct inode *); -}; - -static inline bool fscrypt_dummy_context_enabled(struct inode *inode) -{ - if (inode->i_sb->s_cop->dummy_context && - inode->i_sb->s_cop->dummy_context(inode)) - return true; - return false; -} - -static inline bool fscrypt_valid_contents_enc_mode(u32 mode) -{ - return (mode == FS_ENCRYPTION_MODE_AES_256_XTS); -} - -static inline bool fscrypt_valid_filenames_enc_mode(u32 mode) -{ - return (mode == FS_ENCRYPTION_MODE_AES_256_CTS); -} - -static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) -{ - if (str->len == 1 && str->name[0] == '.') - return true; - - if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') - return true; - - return false; -} - -static inline struct page *fscrypt_control_page(struct page *page) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - return ((struct fscrypt_ctx *)page_private(page))->w.control_page; -#else - WARN_ON_ONCE(1); - return ERR_PTR(-EINVAL); -#endif -} - -static inline int fscrypt_has_encryption_key(const struct inode *inode) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - return (inode->i_crypt_info != NULL); -#else - return 0; -#endif -} - -static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - spin_lock(&dentry->d_lock); - dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY; - spin_unlock(&dentry->d_lock); -#endif -} - -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) -extern const struct dentry_operations fscrypt_d_ops; -#endif - -static inline void fscrypt_set_d_op(struct dentry *dentry) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - d_set_d_op(dentry, &fscrypt_d_ops); -#endif -} - -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) -/* crypto.c */ -extern struct kmem_cache *fscrypt_info_cachep; -extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); -extern void fscrypt_release_ctx(struct fscrypt_ctx *); -extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, - unsigned int, unsigned int, - u64, gfp_t); -extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, - unsigned int, u64); -extern void fscrypt_restore_control_page(struct page *); - -/* policy.c */ -extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); -extern int fscrypt_ioctl_get_policy(struct file *, void __user *); -extern int fscrypt_has_permitted_context(struct inode *, struct inode *); -extern int fscrypt_inherit_context(struct inode *, struct inode *, - void *, bool); -/* keyinfo.c */ -extern int fscrypt_get_encryption_info(struct inode *); -extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *); - -/* fname.c */ -extern int fscrypt_setup_filename(struct inode *, const struct qstr *, - int lookup, struct fscrypt_name *); -extern void fscrypt_free_filename(struct fscrypt_name *); -extern u32 fscrypt_fname_encrypted_size(const struct inode *, u32); -extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, - struct fscrypt_str *); -extern void fscrypt_fname_free_buffer(struct fscrypt_str *); -extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, - const struct fscrypt_str *, struct fscrypt_str *); -extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, - struct fscrypt_str *); - -/* bio.c */ -extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *); -extern void fscrypt_pullback_bio_page(struct page **, bool); -extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, - unsigned int); -#endif - -/* crypto.c */ -static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(const struct inode *i, - gfp_t f) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void fscrypt_notsupp_release_ctx(struct fscrypt_ctx *c) -{ - return; -} - -static inline struct page *fscrypt_notsupp_encrypt_page(const struct inode *i, - struct page *p, - unsigned int len, - unsigned int offs, - u64 lblk_num, gfp_t f) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline int fscrypt_notsupp_decrypt_page(const struct inode *i, struct page *p, - unsigned int len, unsigned int offs, - u64 lblk_num) -{ - return -EOPNOTSUPP; -} - -static inline void fscrypt_notsupp_decrypt_bio_pages(struct fscrypt_ctx *c, - struct bio *b) -{ - return; -} - -static inline void fscrypt_notsupp_pullback_bio_page(struct page **p, bool b) -{ - return; -} - -static inline void fscrypt_notsupp_restore_control_page(struct page *p) -{ - return; -} - -static inline int fscrypt_notsupp_zeroout_range(const struct inode *i, pgoff_t p, - sector_t s, unsigned int f) -{ - return -EOPNOTSUPP; -} - -/* policy.c */ -static inline int fscrypt_notsupp_ioctl_set_policy(struct file *f, - const void __user *arg) -{ - return -EOPNOTSUPP; -} - -static inline int fscrypt_notsupp_ioctl_get_policy(struct file *f, - void __user *arg) -{ - return -EOPNOTSUPP; -} - -static inline int fscrypt_notsupp_has_permitted_context(struct inode *p, - struct inode *i) -{ - return 0; -} - -static inline int fscrypt_notsupp_inherit_context(struct inode *p, - struct inode *i, void *v, bool b) -{ - return -EOPNOTSUPP; -} - -/* keyinfo.c */ -static inline int fscrypt_notsupp_get_encryption_info(struct inode *i) -{ - return -EOPNOTSUPP; -} - -static inline void fscrypt_notsupp_put_encryption_info(struct inode *i, - struct fscrypt_info *f) -{ - return; -} - - /* fname.c */ -static inline int fscrypt_notsupp_setup_filename(struct inode *dir, - const struct qstr *iname, - int lookup, struct fscrypt_name *fname) -{ - if (dir->i_sb->s_cop->is_encrypted(dir)) - return -EOPNOTSUPP; - - memset(fname, 0, sizeof(struct fscrypt_name)); - fname->usr_fname = iname; - fname->disk_name.name = (unsigned char *)iname->name; - fname->disk_name.len = iname->len; - return 0; -} - -static inline void fscrypt_notsupp_free_filename(struct fscrypt_name *fname) -{ - return; -} - -static inline u32 fscrypt_notsupp_fname_encrypted_size(struct inode *i, u32 s) -{ - /* never happens */ - WARN_ON(1); - return 0; -} - -static inline int fscrypt_notsupp_fname_alloc_buffer(struct inode *inode, - u32 ilen, struct fscrypt_str *crypto_str) -{ - return -EOPNOTSUPP; -} - -static inline void fscrypt_notsupp_fname_free_buffer(struct fscrypt_str *c) -{ - return; -} - -static inline int fscrypt_notsupp_fname_disk_to_usr(struct inode *inode, - u32 hash, u32 minor_hash, - const struct fscrypt_str *iname, - struct fscrypt_str *oname) -{ - return -EOPNOTSUPP; -} - -static inline int fscrypt_notsupp_fname_usr_to_disk(struct inode *inode, - const struct qstr *iname, - struct fscrypt_str *oname) -{ - return -EOPNOTSUPP; -} -#endif /* _LINUX_FSCRYPTO_H */ -- cgit v1.2.3 From e58910cdc9f43cda2e52fcdf2fddbdc74e80b2f7 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Mon, 6 Feb 2017 11:22:42 +0000 Subject: efi: Add SHIM and image security database GUID definitions Add the definitions for shim and image security database, both of which are used widely in various Linux distros. Signed-off-by: Josh Boyer Signed-off-by: David Howells Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1486380166-31868-4-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 85e9fdaa8d07..d00538a65899 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -611,6 +611,9 @@ void efi_native_runtime_setup(void); #define EFI_CONSOLE_OUT_DEVICE_GUID EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define APPLE_PROPERTIES_PROTOCOL_GUID EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb, 0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0) +#define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f) +#define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23) + /* * This GUID is used to pass to the kernel proper the struct screen_info * structure that was populated by the stub based on the GOP protocol instance -- cgit v1.2.3 From de8cb458625c164bb3f93c4e415e479afce8fa9d Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 6 Feb 2017 11:22:43 +0000 Subject: efi: Get and store the secure boot status Get the firmware's secure-boot status in the kernel boot wrapper and stash it somewhere that the main kernel image can find. The efi_get_secureboot() function is extracted from the ARM stub and (a) generalised so that it can be called from x86 and (b) made to use efi_call_runtime() so that it can be run in mixed-mode. For x86, it is stored in boot_params and can be overridden by the boot loader or kexec. This allows secure-boot mode to be passed on to a new kernel. Suggested-by: Lukas Wunner Signed-off-by: David Howells Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1486380166-31868-5-git-send-email-ard.biesheuvel@linaro.org [ Small readability edits. ] Signed-off-by: Ingo Molnar --- include/linux/efi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index d00538a65899..94d34e0be24f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1480,6 +1480,14 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, bool efi_runtime_disabled(void); extern void efi_call_virt_check_flags(unsigned long flags, const char *call); +enum efi_secureboot_mode { + efi_secureboot_mode_unset, + efi_secureboot_mode_unknown, + efi_secureboot_mode_disabled, + efi_secureboot_mode_enabled, +}; +enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table); + /* * Arch code can implement the following three template macros, avoiding * reptition for the void/non-void return cases of {__,}efi_call_virt(): -- cgit v1.2.3 From 4025819d328cd0efc53ee22e01b800631944b7f3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 11:58:42 +0100 Subject: delayacct: Include include/linux/delayacct.h relies on 'struct taskstats' but does not include the header that defines it. This worked so far because files that included also happened to include other headers that included uapi/linux/taskstats.h. Fix it. Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/delayacct.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 6cee17c22313..00e60f79a9cc 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -17,6 +17,7 @@ #ifndef _LINUX_DELAYACCT_H #define _LINUX_DELAYACCT_H +#include #include #include -- cgit v1.2.3 From bec84da8d1da6677c458e6eedd8e814eea91b9fc Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 2 Feb 2017 17:41:25 -0800 Subject: device property: allow to constify properties There is no reason why statically defined properties should be modifiable, so let's make device_add_properties() and the rest of pset_*() functions to take const pointers to properties. This will allow us to mark properties as const/__initconst at definition sites. Signed-off-by: Dmitry Torokhov Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 856e50b2140c..d37a4498b3ac 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -242,7 +242,7 @@ struct property_entry { } int device_add_properties(struct device *dev, - struct property_entry *properties); + const struct property_entry *properties); void device_remove_properties(struct device *dev); bool device_dma_supported(struct device *dev); -- cgit v1.2.3 From 9426998ce6f8616c48c2834cafbe5616da3f5abd Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 2 Feb 2017 17:41:26 -0800 Subject: device property: constify property arrays values Data that is fed into property arrays should not be modified, so let's mark relevant pointers as const. This will allow us making source arrays as const/__initconst. Signed-off-by: Dmitry Torokhov Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index d37a4498b3ac..7a0a1cce5165 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -160,12 +160,12 @@ struct property_entry { bool is_string; union { union { - void *raw_data; - u8 *u8_data; - u16 *u16_data; - u32 *u32_data; - u64 *u64_data; - const char **str; + const void *raw_data; + const u8 *u8_data; + const u16 *u16_data; + const u32 *u32_data; + const u64 *u64_data; + const char * const *str; } pointer; union { unsigned long long raw_data; -- cgit v1.2.3 From 2d479e1fa2d09c5a9518a75a5d21ef2713117946 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 2 Feb 2017 17:41:27 -0800 Subject: device property: export code duplicating array of property entries When augmenting ACPI-enumerated devices with additional property data based on DMI info, a module has often several potential property sets, with only one being active on a given box. In order to save memory it should be possible to mark everything and __initdata or __initconst, execute DMI match early, and duplicate relevant properties. Then kernel will discard the rest of them. Signed-off-by: Dmitry Torokhov Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 7a0a1cce5165..64e3a9c6d95f 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -241,6 +241,11 @@ struct property_entry { .name = _name_, \ } +struct property_entry * +property_entries_dup(const struct property_entry *properties); + +void property_entries_free(const struct property_entry *properties); + int device_add_properties(struct device *dev, const struct property_entry *properties); void device_remove_properties(struct device *dev); -- cgit v1.2.3 From febf2407418a2d6c042fcd77b206040449cb9a70 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 6 Feb 2017 18:01:51 +0100 Subject: x86/ACPI: keep x86_cpu_to_acpiid mapping valid on CPU hotplug We may or may not have all possible CPUs in MADT on boot but in any case we're overwriting x86_cpu_to_acpiid mapping with U32_MAX when acpi_register_lapic() is called again on the CPU hotplug path: acpi_processor_hotadd_init() -> acpi_map_cpu() -> acpi_register_lapic() As we have the required acpi_id information in acpi_processor_hotadd_init() propagate it to acpi_map_cpu() to always keep x86_cpu_to_acpiid mapping valid. Reported-by: Andrew Jones Signed-off-by: Vitaly Kuznetsov Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 5b36974ed60a..6ab47e92c65a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -291,7 +291,8 @@ bool acpi_processor_validate_proc_id(int proc_id); #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ -int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu); +int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, + int *pcpu); int acpi_unmap_cpu(int cpu); int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ -- cgit v1.2.3 From af7bd4dc13093bf1477f370722bbab24cf457b91 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 17 Jan 2017 06:34:52 +0200 Subject: vfs: create vfs helper vfs_tmpfile() Factor out some common vfs bits from do_tmpfile() to be used by overlayfs for concurrent copy up. Signed-off-by: Amir Goldstein Cc: Al Viro Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2ba074328894..4a7f3cc9edab 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1561,6 +1561,9 @@ extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); extern int vfs_whiteout(struct inode *, struct dentry *); +extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, + int open_flag); + /* * VFS file helper functions. */ -- cgit v1.2.3 From bfe219d373cadab761373aeea4c70406bc27ea2c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 31 Jan 2017 10:34:57 +0200 Subject: vfs: wrap write f_ops with file_{start,end}_write() Before calling write f_ops, call file_start_write() instead of sb_start_write(). Replace {sb,file}_start_write() for {copy,clone}_file_range() and for fallocate(). Beyond correct semantics, this avoids freeze protection to sb when operating on special inodes, such as fallocate() on a blockdev. Reviewed-by: Jan Kara Signed-off-by: Amir Goldstein Reviewed-by: Christoph Hellwig Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 4a7f3cc9edab..78c81e6f5d76 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1741,19 +1741,6 @@ extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); -static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 len) -{ - int ret; - - sb_start_write(file_inode(file_out)->i_sb); - ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); - sb_end_write(file_inode(file_out)->i_sb); - - return ret; -} - struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); @@ -2564,6 +2551,19 @@ static inline void file_end_write(struct file *file) __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); } +static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 len) +{ + int ret; + + file_start_write(file_out); + ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); + file_end_write(file_out); + + return ret; +} + /* * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. -- cgit v1.2.3 From 9fe7bfce8b3e112e8e08c40deb72ee7e24c6f072 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 2 Feb 2017 19:16:01 -0800 Subject: virtio_net: refactor freeze/restore logic into virtnet reset logic For XDP we will need to reset the queues to allow for buffer headroom to be configured. In order to do this we need to essentially run the freeze()/restore() code path. Unfortunately the locking requirements between the freeze/restore and reset paths are different however so we can not simply reuse the code. This patch refactors the code path and adds a reset helper routine. Signed-off-by: John Fastabend Acked-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/virtio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index d5eb5479a425..04b0d3f95043 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -132,12 +132,16 @@ static inline struct virtio_device *dev_to_virtio(struct device *_dev) return container_of(_dev, struct virtio_device, dev); } +void virtio_add_status(struct virtio_device *dev, unsigned int status); int register_virtio_device(struct virtio_device *dev); void unregister_virtio_device(struct virtio_device *dev); void virtio_break_device(struct virtio_device *dev); void virtio_config_changed(struct virtio_device *dev); +void virtio_config_disable(struct virtio_device *dev); +void virtio_config_enable(struct virtio_device *dev); +int virtio_finalize_features(struct virtio_device *dev); #ifdef CONFIG_PM_SLEEP int virtio_device_freeze(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev); -- cgit v1.2.3 From 55601a880690cdeccdb5923c2493f0e3736f8f6b Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 4 Feb 2017 20:02:49 +0100 Subject: net: phy: Add 2000base-x, 2500base-x and rxaui modes The mv88e6390 ports 9 and 10 supports some additional PHY modes. Add these modes to the PHY core so they can be used in the binding. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 43474f39ef65..28ae9eafec19 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -81,6 +81,9 @@ typedef enum { PHY_INTERFACE_MODE_MOCA, PHY_INTERFACE_MODE_QSGMII, PHY_INTERFACE_MODE_TRGMII, + PHY_INTERFACE_MODE_1000BASEX, + PHY_INTERFACE_MODE_2500BASEX, + PHY_INTERFACE_MODE_RXAUI, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -141,6 +144,12 @@ static inline const char *phy_modes(phy_interface_t interface) return "qsgmii"; case PHY_INTERFACE_MODE_TRGMII: return "trgmii"; + case PHY_INTERFACE_MODE_1000BASEX: + return "1000base-x"; + case PHY_INTERFACE_MODE_2500BASEX: + return "2500base-x"; + case PHY_INTERFACE_MODE_RXAUI: + return "rxaui"; default: return "unknown"; } -- cgit v1.2.3 From 648ea0134069cda7d4940f397bcc6901fb88752a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 4 Feb 2017 13:02:44 -0800 Subject: net: phy: Allow pre-declaration of MDIO devices Allow board support code to collect pre-declarations for MDIO devices by registering them with mdiobus_register_board_info(). SPI and I2C buses have a similar feature, we were missing this for MDIO devices, but this is particularly useful for e.g: MDIO-connected switches which need to provide their port layout (often board-specific) to a MDIO Ethernet switch driver. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mdio.h | 3 +++ include/linux/mod_devicetable.h | 1 + include/linux/phy.h | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 55a80d73cfc1..ca08ab16ecdc 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -10,6 +10,7 @@ #define __LINUX_MDIO_H__ #include +#include struct mii_bus; @@ -29,6 +30,7 @@ struct mdio_device { const struct dev_pm_ops *pm_ops; struct mii_bus *bus; + char modalias[MDIO_NAME_SIZE]; int (*bus_match)(struct device *dev, struct device_driver *drv); void (*device_free)(struct mdio_device *mdiodev); @@ -71,6 +73,7 @@ int mdio_device_register(struct mdio_device *mdiodev); void mdio_device_remove(struct mdio_device *mdiodev); int mdio_driver_register(struct mdio_driver *drv); void mdio_driver_unregister(struct mdio_driver *drv); +int mdio_device_bus_match(struct device *dev, struct device_driver *drv); static inline bool mdio_phy_id_is_c45(int phy_id) { diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 8a57f0b1242d..8850fcaf50db 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -501,6 +501,7 @@ struct platform_device_id { kernel_ulong_t driver_data; }; +#define MDIO_NAME_SIZE 32 #define MDIO_MODULE_PREFIX "mdio:" #define MDIO_ID_FMT "%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d" diff --git a/include/linux/phy.h b/include/linux/phy.h index 28ae9eafec19..d9bdf53e0514 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -886,6 +886,25 @@ void mdio_bus_exit(void); extern struct bus_type mdio_bus_type; +struct mdio_board_info { + const char *bus_id; + char modalias[MDIO_NAME_SIZE]; + int mdio_addr; + const void *platform_data; +}; + +#if IS_ENABLED(CONFIG_PHYLIB) +int mdiobus_register_board_info(const struct mdio_board_info *info, + unsigned int n); +#else +static inline int mdiobus_register_board_info(const struct mdio_board_info *i, + unsigned int n) +{ + return 0; +} +#endif + + /** * module_phy_driver() - Helper macro for registering PHY drivers * @__phy_drivers: array of PHY drivers to register -- cgit v1.2.3 From b08d46b01e995dd7b653b22d35bd1d958d6ee9b4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 6 Feb 2017 13:01:16 -0800 Subject: net: phy: bcm7xxx: Add BCM74371 PHY ID Add the BCM74371 PHY ID to the list of supported chips. This is a 28nm technology Gigabit PHY SoC. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 5881d1fdc1fb..55e517130311 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -33,6 +33,7 @@ #define PHY_ID_BCM7425 0x600d86b0 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7435 0x600d8750 +#define PHY_ID_BCM74371 0xae0252e0 #define PHY_ID_BCM7439 0x600d8480 #define PHY_ID_BCM7439_2 0xae025080 #define PHY_ID_BCM7445 0x600d8510 -- cgit v1.2.3 From 4ff0620354f2b39b9fe2a91c22c4de9d1fba0c8e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 6 Feb 2017 23:14:12 +0200 Subject: net: add dst_pending_confirm flag to skbuff Add new skbuff flag to allow protocols to confirm neighbour. When same struct dst_entry can be used for many different neighbours we can not use it for pending confirmations. Add sock_confirm_neigh() helper to confirm the neighbour and use it for IPv4, IPv6 and VRF before dst_neigh_output. Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c6a78e1892b6..f1adddc1c5ac 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -612,6 +612,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS + * @dst_pending_confirm: need to confirm neighbour * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking * @mark: Generic packet mark @@ -741,6 +742,7 @@ struct sk_buff { __u8 csum_level:2; __u8 csum_bad:1; + __u8 dst_pending_confirm:1; #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif @@ -3698,6 +3700,16 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) return skb->queue_mapping != 0; } +static inline void skb_set_dst_pending_confirm(struct sk_buff *skb, u32 val) +{ + skb->dst_pending_confirm = val; +} + +static inline bool skb_get_dst_pending_confirm(const struct sk_buff *skb) +{ + return skb->dst_pending_confirm != 0; +} + static inline struct sec_path *skb_sec_path(struct sk_buff *skb) { #ifdef CONFIG_XFRM -- cgit v1.2.3 From 85c727b5948344c8d559e2fda8925e9ddd41c29a Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 7 Feb 2017 11:37:56 -0200 Subject: sctp: drop __packed from almost all SCTP structures __packed is considered harmful as it potentially generates code that doesn't perform well and its usage should be avoided as much as possible. This patch drops __packed from all SCTP structures except one, which is sctp_signed_cookie. In there it's required, as per changelog on commit 9834a2bb4970 ("[SCTP]: Fix sctp_cookie alignment in the packet."). After this patch, no alignment changes neither in x86 or x86_64 and no exceptions were noticed during testing on both archs. Code size for SCTP module also didn't change with this patch. Cc: David Miller Cc: David Laight Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/sctp.h | 80 ++++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index a9e790685af3..2408c6877ca0 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -62,7 +62,7 @@ typedef struct sctphdr { __be16 dest; __be32 vtag; __le32 checksum; -} __packed sctp_sctphdr_t; +} sctp_sctphdr_t; static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb) { @@ -74,7 +74,7 @@ typedef struct sctp_chunkhdr { __u8 type; __u8 flags; __be16 length; -} __packed sctp_chunkhdr_t; +} sctp_chunkhdr_t; /* Section 3.2. Chunk Type Values. @@ -165,7 +165,7 @@ enum { SCTP_CHUNK_FLAG_T = 0x01 }; typedef struct sctp_paramhdr { __be16 type; __be16 length; -} __packed sctp_paramhdr_t; +} sctp_paramhdr_t; typedef enum { @@ -233,12 +233,12 @@ typedef struct sctp_datahdr { __be16 ssn; __be32 ppid; __u8 payload[0]; -} __packed sctp_datahdr_t; +} sctp_datahdr_t; typedef struct sctp_data_chunk { sctp_chunkhdr_t chunk_hdr; sctp_datahdr_t data_hdr; -} __packed sctp_data_chunk_t; +} sctp_data_chunk_t; /* DATA Chuck Specific Flags */ enum { @@ -264,78 +264,78 @@ typedef struct sctp_inithdr { __be16 num_inbound_streams; __be32 initial_tsn; __u8 params[0]; -} __packed sctp_inithdr_t; +} sctp_inithdr_t; typedef struct sctp_init_chunk { sctp_chunkhdr_t chunk_hdr; sctp_inithdr_t init_hdr; -} __packed sctp_init_chunk_t; +} sctp_init_chunk_t; /* Section 3.3.2.1. IPv4 Address Parameter (5) */ typedef struct sctp_ipv4addr_param { sctp_paramhdr_t param_hdr; struct in_addr addr; -} __packed sctp_ipv4addr_param_t; +} sctp_ipv4addr_param_t; /* Section 3.3.2.1. IPv6 Address Parameter (6) */ typedef struct sctp_ipv6addr_param { sctp_paramhdr_t param_hdr; struct in6_addr addr; -} __packed sctp_ipv6addr_param_t; +} sctp_ipv6addr_param_t; /* Section 3.3.2.1 Cookie Preservative (9) */ typedef struct sctp_cookie_preserve_param { sctp_paramhdr_t param_hdr; __be32 lifespan_increment; -} __packed sctp_cookie_preserve_param_t; +} sctp_cookie_preserve_param_t; /* Section 3.3.2.1 Host Name Address (11) */ typedef struct sctp_hostname_param { sctp_paramhdr_t param_hdr; uint8_t hostname[0]; -} __packed sctp_hostname_param_t; +} sctp_hostname_param_t; /* Section 3.3.2.1 Supported Address Types (12) */ typedef struct sctp_supported_addrs_param { sctp_paramhdr_t param_hdr; __be16 types[0]; -} __packed sctp_supported_addrs_param_t; +} sctp_supported_addrs_param_t; /* Appendix A. ECN Capable (32768) */ typedef struct sctp_ecn_capable_param { sctp_paramhdr_t param_hdr; -} __packed sctp_ecn_capable_param_t; +} sctp_ecn_capable_param_t; /* ADDIP Section 3.2.6 Adaptation Layer Indication */ typedef struct sctp_adaptation_ind_param { struct sctp_paramhdr param_hdr; __be32 adaptation_ind; -} __packed sctp_adaptation_ind_param_t; +} sctp_adaptation_ind_param_t; /* ADDIP Section 4.2.7 Supported Extensions Parameter */ typedef struct sctp_supported_ext_param { struct sctp_paramhdr param_hdr; __u8 chunks[0]; -} __packed sctp_supported_ext_param_t; +} sctp_supported_ext_param_t; /* AUTH Section 3.1 Random */ typedef struct sctp_random_param { sctp_paramhdr_t param_hdr; __u8 random_val[0]; -} __packed sctp_random_param_t; +} sctp_random_param_t; /* AUTH Section 3.2 Chunk List */ typedef struct sctp_chunks_param { sctp_paramhdr_t param_hdr; __u8 chunks[0]; -} __packed sctp_chunks_param_t; +} sctp_chunks_param_t; /* AUTH Section 3.3 HMAC Algorithm */ typedef struct sctp_hmac_algo_param { sctp_paramhdr_t param_hdr; __be16 hmac_ids[0]; -} __packed sctp_hmac_algo_param_t; +} sctp_hmac_algo_param_t; /* RFC 2960. Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2): * The INIT ACK chunk is used to acknowledge the initiation of an SCTP @@ -347,13 +347,13 @@ typedef sctp_init_chunk_t sctp_initack_chunk_t; typedef struct sctp_cookie_param { sctp_paramhdr_t p; __u8 body[0]; -} __packed sctp_cookie_param_t; +} sctp_cookie_param_t; /* Section 3.3.3.1 Unrecognized Parameters (8) */ typedef struct sctp_unrecognized_param { sctp_paramhdr_t param_hdr; sctp_paramhdr_t unrecognized; -} __packed sctp_unrecognized_param_t; +} sctp_unrecognized_param_t; @@ -368,7 +368,7 @@ typedef struct sctp_unrecognized_param { typedef struct sctp_gap_ack_block { __be16 start; __be16 end; -} __packed sctp_gap_ack_block_t; +} sctp_gap_ack_block_t; typedef __be32 sctp_dup_tsn_t; @@ -383,12 +383,12 @@ typedef struct sctp_sackhdr { __be16 num_gap_ack_blocks; __be16 num_dup_tsns; sctp_sack_variable_t variable[0]; -} __packed sctp_sackhdr_t; +} sctp_sackhdr_t; typedef struct sctp_sack_chunk { sctp_chunkhdr_t chunk_hdr; sctp_sackhdr_t sack_hdr; -} __packed sctp_sack_chunk_t; +} sctp_sack_chunk_t; /* RFC 2960. Section 3.3.5 Heartbeat Request (HEARTBEAT) (4): @@ -400,12 +400,12 @@ typedef struct sctp_sack_chunk { typedef struct sctp_heartbeathdr { sctp_paramhdr_t info; -} __packed sctp_heartbeathdr_t; +} sctp_heartbeathdr_t; typedef struct sctp_heartbeat_chunk { sctp_chunkhdr_t chunk_hdr; sctp_heartbeathdr_t hb_hdr; -} __packed sctp_heartbeat_chunk_t; +} sctp_heartbeat_chunk_t; /* For the abort and shutdown ACK we must carry the init tag in the @@ -414,7 +414,7 @@ typedef struct sctp_heartbeat_chunk { */ typedef struct sctp_abort_chunk { sctp_chunkhdr_t uh; -} __packed sctp_abort_chunk_t; +} sctp_abort_chunk_t; /* For the graceful shutdown we must carry the tag (in common header) @@ -422,12 +422,12 @@ typedef struct sctp_abort_chunk { */ typedef struct sctp_shutdownhdr { __be32 cum_tsn_ack; -} __packed sctp_shutdownhdr_t; +} sctp_shutdownhdr_t; struct sctp_shutdown_chunk_t { sctp_chunkhdr_t chunk_hdr; sctp_shutdownhdr_t shutdown_hdr; -} __packed; +}; /* RFC 2960. Section 3.3.10 Operation Error (ERROR) (9) */ @@ -435,12 +435,12 @@ typedef struct sctp_errhdr { __be16 cause; __be16 length; __u8 variable[0]; -} __packed sctp_errhdr_t; +} sctp_errhdr_t; typedef struct sctp_operr_chunk { sctp_chunkhdr_t chunk_hdr; sctp_errhdr_t err_hdr; -} __packed sctp_operr_chunk_t; +} sctp_operr_chunk_t; /* RFC 2960 3.3.10 - Operation Error * @@ -530,7 +530,7 @@ typedef struct sctp_ecnehdr { typedef struct sctp_ecne_chunk { sctp_chunkhdr_t chunk_hdr; sctp_ecnehdr_t ence_hdr; -} __packed sctp_ecne_chunk_t; +} sctp_ecne_chunk_t; /* RFC 2960. Appendix A. Explicit Congestion Notification. * Congestion Window Reduced (CWR) (13) @@ -542,7 +542,7 @@ typedef struct sctp_cwrhdr { typedef struct sctp_cwr_chunk { sctp_chunkhdr_t chunk_hdr; sctp_cwrhdr_t cwr_hdr; -} __packed sctp_cwr_chunk_t; +} sctp_cwr_chunk_t; /* PR-SCTP * 3.2 Forward Cumulative TSN Chunk Definition (FORWARD TSN) @@ -593,17 +593,17 @@ typedef struct sctp_cwr_chunk { struct sctp_fwdtsn_skip { __be16 stream; __be16 ssn; -} __packed; +}; struct sctp_fwdtsn_hdr { __be32 new_cum_tsn; struct sctp_fwdtsn_skip skip[0]; -} __packed; +}; struct sctp_fwdtsn_chunk { struct sctp_chunkhdr chunk_hdr; struct sctp_fwdtsn_hdr fwdtsn_hdr; -} __packed; +}; /* ADDIP @@ -641,17 +641,17 @@ struct sctp_fwdtsn_chunk { typedef struct sctp_addip_param { sctp_paramhdr_t param_hdr; __be32 crr_id; -} __packed sctp_addip_param_t; +} sctp_addip_param_t; typedef struct sctp_addiphdr { __be32 serial; __u8 params[0]; -} __packed sctp_addiphdr_t; +} sctp_addiphdr_t; typedef struct sctp_addip_chunk { sctp_chunkhdr_t chunk_hdr; sctp_addiphdr_t addip_hdr; -} __packed sctp_addip_chunk_t; +} sctp_addip_chunk_t; /* AUTH * Section 4.1 Authentication Chunk (AUTH) @@ -706,12 +706,12 @@ typedef struct sctp_authhdr { __be16 shkey_id; __be16 hmac_id; __u8 hmac[0]; -} __packed sctp_authhdr_t; +} sctp_authhdr_t; typedef struct sctp_auth_chunk { sctp_chunkhdr_t chunk_hdr; sctp_authhdr_t auth_hdr; -} __packed sctp_auth_chunk_t; +} sctp_auth_chunk_t; struct sctp_infox { struct sctp_info *sctpinfo; -- cgit v1.2.3 From 376bc27150f180d9f5eddec6a14117780177589d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 19 Apr 2016 08:56:27 +0200 Subject: clockevents: Add a clkevt-of mechanism like clksrc-of The current code uses the CLOCKSOURCE_OF_DECLARE macro to fill the clksrc table with a t-uple (name, init_function). Unfortunately it ends up to the clockevent and the clocksource being both initialized with this macro. It is not a problem by itself but there is not a clear distinction between a clockevent and a clocksource in the code initialization path. Somebody can argue there are the same IP block and the same DT node. But conceptually from the software side, there are two distincts entities and as is they should be initialized separetely. Some drivers which do not have a clocksource end up by using the CLOCKSOURCE_OF_DECLARE macro to declare a clockevent. Another result is the fuzzy organization in the clocksource directory, where the clockevents are implemented in the same file than the clocksources or file labelled timer-something implementing a clocksource. This patch provides another macro to specifically declare a clockevent in the same way than the clocksource and gives the opportunity to write two separate drivers, one for the clocksource and another for the clockevents. Hopefully, that can help to do some housework in the directory, perhaps split the drivers in to entities, for example: - clksrc-rockchip.c - clkevt-rockchip.c Also, it gives the possibility to declare clocksources separately in the DT and then use a clocksource from IP block while while clockevents are used from another IP block. Signed-off-by: Daniel Lezcano --- include/linux/clockchips.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 0d442e34c349..5d3053c34fb3 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -224,4 +224,13 @@ static inline void tick_setup_hrtimer_broadcast(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ +#define CLOCKEVENT_OF_DECLARE(name, compat, fn) \ + OF_DECLARE_1_RET(clkevt, name, compat, fn) + +#ifdef CONFIG_CLKEVT_PROBE +extern int clockevent_probe(void); +#els +static inline int clockevent_probe(void) { return 0; } +#endif + #endif /* _LINUX_CLOCKCHIPS_H */ -- cgit v1.2.3 From 0f5bf6d0afe4be6e1391908ff2d6dc9730e91550 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 6 Feb 2017 16:31:58 -0800 Subject: arch: Rename CONFIG_DEBUG_RODATA and CONFIG_DEBUG_MODULE_RONX Both of these options are poorly named. The features they provide are necessary for system security and should not be considered debug only. Change the names to CONFIG_STRICT_KERNEL_RWX and CONFIG_STRICT_MODULE_RWX to better describe what these options do. Signed-off-by: Laura Abbott Acked-by: Jessica Yu Signed-off-by: Kees Cook --- include/linux/filter.h | 4 ++-- include/linux/init.h | 4 ++-- include/linux/module.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index a0934e6c9bab..c6dd53e88711 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -543,7 +543,7 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog) #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) -#ifdef CONFIG_DEBUG_SET_MODULE_RONX +#ifdef CONFIG_STRICT_MODULE_RWX static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { set_memory_ro((unsigned long)fp, fp->pages); @@ -561,7 +561,7 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp) static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { } -#endif /* CONFIG_DEBUG_SET_MODULE_RONX */ +#endif /* CONFIG_STRICT_MODULE_RWX */ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); static inline int sk_filter(struct sock *sk, struct sk_buff *skb) diff --git a/include/linux/init.h b/include/linux/init.h index 885c3e6d0f9d..79af0962fd52 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -126,10 +126,10 @@ void prepare_namespace(void); void __init load_default_modules(void); int __init init_rootfs(void); -#if defined(CONFIG_DEBUG_RODATA) || defined(CONFIG_DEBUG_SET_MODULE_RONX) +#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX) extern bool rodata_enabled; #endif -#ifdef CONFIG_DEBUG_RODATA +#ifdef CONFIG_STRICT_KERNEL_RWX void mark_rodata_ro(void); #endif diff --git a/include/linux/module.h b/include/linux/module.h index 7c84273d60b9..d5afd142818f 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -764,7 +764,7 @@ extern int module_sysfs_initialized; #define __MODULE_STRING(x) __stringify(x) -#ifdef CONFIG_DEBUG_SET_MODULE_RONX +#ifdef CONFIG_STRICT_MODULE_RWX extern void set_all_modules_text_rw(void); extern void set_all_modules_text_ro(void); extern void module_enable_ro(const struct module *mod, bool after_init); -- cgit v1.2.3 From 66cd794e3c30b8af3b6befe42a378557efb3114a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 7 Feb 2017 22:40:44 +0200 Subject: nl80211: add HT/VHT capabilities to AP parameters For the benefit of drivers that rebuild IEs in firmware, parse the IEs for HT/VHT capabilities and the respective membership selector in the (extended) supported rates. This avoids duplicating the same code into all drivers that need this information. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 02768de209d6..0dd9498c694f 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1043,8 +1043,9 @@ struct ieee80211_mgmt { } u; } __packed __aligned(2); -/* Supported Rates value encodings in 802.11n-2009 7.3.2.2 */ +/* Supported rates membership selectors */ #define BSS_MEMBERSHIP_SELECTOR_HT_PHY 127 +#define BSS_MEMBERSHIP_SELECTOR_VHT_PHY 126 /* mgmt header + 1 byte category code */ #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u) -- cgit v1.2.3 From f92bac3b141b8233e34ddf32d227e12bfba07b48 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 27 Dec 2016 23:16:05 +0900 Subject: printk: rename nmi.c and exported api A preparation patch for printk_safe work. No functional change. - rename nmi.c to print_safe.c - add `printk_safe' prefix to some (which used both by printk-safe and printk-nmi) of the exported functions. Link: http://lkml.kernel.org/r/20161227141611.940-3-sergey.senozhatsky@gmail.com Cc: Andrew Morton Cc: Linus Torvalds Cc: Jan Kara Cc: Tejun Heo Cc: Calvin Owens Cc: Steven Rostedt Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Peter Hurley Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- include/linux/printk.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 3472cc6b7a60..37e933eeffb2 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -147,17 +147,17 @@ void early_printk(const char *s, ...) { } #endif #ifdef CONFIG_PRINTK_NMI -extern void printk_nmi_init(void); +extern void printk_safe_init(void); extern void printk_nmi_enter(void); extern void printk_nmi_exit(void); -extern void printk_nmi_flush(void); -extern void printk_nmi_flush_on_panic(void); +extern void printk_safe_flush(void); +extern void printk_safe_flush_on_panic(void); #else -static inline void printk_nmi_init(void) { } +static inline void printk_safe_init(void) { } static inline void printk_nmi_enter(void) { } static inline void printk_nmi_exit(void) { } -static inline void printk_nmi_flush(void) { } -static inline void printk_nmi_flush_on_panic(void) { } +static inline void printk_safe_flush(void) { } +static inline void printk_safe_flush_on_panic(void) { } #endif /* PRINTK_NMI */ #ifdef CONFIG_PRINTK -- cgit v1.2.3 From 099f1c84c0052ec1b27f1c3942eed5830d86bdbb Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 27 Dec 2016 23:16:06 +0900 Subject: printk: introduce per-cpu safe_print seq buffer This patch extends the idea of NMI per-cpu buffers to regions that may cause recursive printk() calls and possible deadlocks. Namely, printk() can't handle printk calls from schedule code or printk() calls from lock debugging code (spin_dump() for instance); because those may be called with `sem->lock' already taken or any other `critical' locks (p->pi_lock, etc.). An example of deadlock can be vprintk_emit() console_unlock() up() << raw_spin_lock_irqsave(&sem->lock, flags); wake_up_process() try_to_wake_up() ttwu_queue() ttwu_activate() activate_task() enqueue_task() enqueue_task_fair() cfs_rq_of() task_of() WARN_ON_ONCE(!entity_is_task(se)) vprintk_emit() console_trylock() down_trylock() raw_spin_lock_irqsave(&sem->lock, flags) ^^^^ deadlock and some other cases. Just like in NMI implementation, the solution uses a per-cpu `printk_func' pointer to 'redirect' printk() calls to a 'safe' callback, that store messages in a per-cpu buffer and flushes them back to logbuf buffer later. Usage example: printk() printk_safe_enter_irqsave(flags) // // any printk() call from here will endup in vprintk_safe(), // that stores messages in a special per-CPU buffer. // printk_safe_exit_irqrestore(flags) The 'redirection' mechanism, though, has been reworked, as suggested by Petr Mladek. Instead of using a per-cpu @print_func callback we now keep a per-cpu printk-context variable and call either default or nmi vprintk function depending on its value. printk_nmi_entrer/exit and printk_safe_enter/exit, thus, just set/celar corresponding bits in printk-context functions. The patch only adds printk_safe support, we don't use it yet. Link: http://lkml.kernel.org/r/20161227141611.940-4-sergey.senozhatsky@gmail.com Cc: Andrew Morton Cc: Linus Torvalds Cc: Jan Kara Cc: Tejun Heo Cc: Calvin Owens Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Peter Hurley Cc: linux-kernel@vger.kernel.org Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Reviewed-by: Steven Rostedt (VMware) --- include/linux/printk.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 37e933eeffb2..571257e0f53d 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -147,17 +147,11 @@ void early_printk(const char *s, ...) { } #endif #ifdef CONFIG_PRINTK_NMI -extern void printk_safe_init(void); extern void printk_nmi_enter(void); extern void printk_nmi_exit(void); -extern void printk_safe_flush(void); -extern void printk_safe_flush_on_panic(void); #else -static inline void printk_safe_init(void) { } static inline void printk_nmi_enter(void) { } static inline void printk_nmi_exit(void) { } -static inline void printk_safe_flush(void) { } -static inline void printk_safe_flush_on_panic(void) { } #endif /* PRINTK_NMI */ #ifdef CONFIG_PRINTK @@ -209,6 +203,9 @@ void __init setup_log_buf(int early); __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); +extern void printk_safe_init(void); +extern void printk_safe_flush(void); +extern void printk_safe_flush_on_panic(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -268,6 +265,18 @@ static inline void dump_stack_print_info(const char *log_lvl) static inline void show_regs_print_info(const char *log_lvl) { } + +static inline void printk_safe_init(void) +{ +} + +static inline void printk_safe_flush(void) +{ +} + +static inline void printk_safe_flush_on_panic(void) +{ +} #endif extern asmlinkage void dump_stack(void) __cold; -- cgit v1.2.3 From d3e1b617ae20c459627f501b4bc55b1ea91f662b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 2 Feb 2017 17:41:28 -0800 Subject: i2c: allow specify device properties in i2c_board_info With many drivers converting to using generic device properties, it is useful to provide array of device properties when instantiating new i2c client via i2c_board_info and have them automatically added to the device in question. Signed-off-by: Dmitry Torokhov Reviewed-by: Andy Shevchenko Acked-by: Wolfram Sang Signed-off-by: Rafael J. Wysocki --- include/linux/i2c.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 4b45ec46161f..7b23a3316dcb 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -51,6 +51,7 @@ enum i2c_slave_event; typedef int (*i2c_slave_cb_t)(struct i2c_client *, enum i2c_slave_event, u8 *); struct module; +struct property_entry; #if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE) /* @@ -299,6 +300,7 @@ static inline int i2c_slave_event(struct i2c_client *client, * @archdata: copied into i2c_client.dev.archdata * @of_node: pointer to OpenFirmware device node * @fwnode: device node supplied by the platform firmware + * @properties: additional device properties for the device * @irq: stored in i2c_client.irq * * I2C doesn't actually support hardware probing, although controllers and @@ -320,6 +322,7 @@ struct i2c_board_info { struct dev_archdata *archdata; struct device_node *of_node; struct fwnode_handle *fwnode; + const struct property_entry *properties; int irq; }; -- cgit v1.2.3 From 4f46de9d2eda184b0e49e32bb2a92c6a06b336f5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:50:14 +0000 Subject: irqchip/gic-v3-its: Drop deprecated GITS_BASER_TYPE_CPU During the development of the GICv3/v4 architecture, it was envisaged to have a CPU table, though the use for it was never completely clear (the collection table serves that role pretty well). It ended being dropped before the specification was published, though it lived on in the driver. In order to avoid people scratching their head too much, let's do the same in the kernel. Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index e808f8ae6f14..b6e7629f8bb0 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -306,7 +306,7 @@ #define GITS_BASER_TYPE_NONE 0 #define GITS_BASER_TYPE_DEVICE 1 #define GITS_BASER_TYPE_VCPU 2 -#define GITS_BASER_TYPE_CPU 3 +#define GITS_BASER_TYPE_RESERVED3 3 #define GITS_BASER_TYPE_COLLECTION 4 #define GITS_BASER_TYPE_RESERVED5 5 #define GITS_BASER_TYPE_RESERVED6 6 -- cgit v1.2.3 From 6a25ad3a9f9832f24df7987227122b8359f05c8e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2016 15:52:26 +0000 Subject: irqchip/gic-v3-its: Rename MAPVI to MAPTI Back in the days when the GICv3/v4 architecture was drafted, the command to an event to an LPI number was called MAPVI. Later on, and to avoid confusion with the GICv4 command VMAPI, it was renamed MAPTI. We've carried the old name for a long time, but it gets in the way of people reading the code in the light of the public architecture specification. Just repaint all the references and kill the old definition. Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index b6e7629f8bb0..400ed7ad2bff 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -320,8 +320,6 @@ #define GITS_CMD_MAPD 0x08 #define GITS_CMD_MAPC 0x09 #define GITS_CMD_MAPTI 0x0a -/* older GIC documentation used MAPVI for this command */ -#define GITS_CMD_MAPVI GITS_CMD_MAPTI #define GITS_CMD_MAPI 0x0b #define GITS_CMD_MOVI 0x01 #define GITS_CMD_DISCARD 0x0f -- cgit v1.2.3 From e3c484b183d3bbef0f0df527e10359163f0bb1db Mon Sep 17 00:00:00 2001 From: Alim Akhtar Date: Wed, 4 Jan 2017 13:54:20 +0530 Subject: irqchip/gic-v3: Remove duplicate definition of GICD_TYPER_LPIS GICD_TYPER_LPIS macro is defined twice in this file. This patch removes the duplicate entry. Fixes: f5c1434c217f ("irqchip: GICv3: rework redistributor structure") Signed-off-by: Alim Akhtar Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 400ed7ad2bff..725e86b506f3 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -73,7 +73,6 @@ #define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) #define GICD_TYPER_IRQS(typer) ((((typer) & 0x1f) + 1) * 32) -#define GICD_TYPER_LPIS (1U << 17) #define GICD_IROUTER_SPI_MODE_ONE (0U << 31) #define GICD_IROUTER_SPI_MODE_ANY (1U << 31) -- cgit v1.2.3 From 3046ec674d441562c6bb3e4284cd866743042ef3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 8 Feb 2017 14:54:12 +0000 Subject: ARM: smccc: Update HVC comment to describe new quirk parameter Commit 680a0873e193 ("arm: kernel: Add SMC structure parameter") added a new "quirk" parameter to the SMC and HVC SMCCC backends, but only updated the comment for the SMC version. This patch adds the new paramater to the comment describing the HVC version too. Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index b67934164401..4c5bca38c653 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -109,6 +109,7 @@ asmlinkage void __arm_smccc_smc(unsigned long a0, unsigned long a1, * __arm_smccc_hvc() - make HVC calls * @a0-a7: arguments passed in registers 0 to 7 * @res: result values from registers 0 to 3 + * @quirk: points to an arm_smccc_quirk, or NULL when no quirks are required. * * This function is used to make HVC calls following SMC Calling * Convention. The content of the supplied param are copied to registers 0 -- cgit v1.2.3 From 217e6fa24ce28ec87fca8da93c9016cb78028612 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 7 Feb 2017 15:57:20 -0500 Subject: net: introduce device min_header_len The stack must not pass packets to device drivers that are shorter than the minimum link layer header length. Previously, packet sockets would drop packets smaller than or equal to dev->hard_header_len, but this has false positives. Zero length payload is used over Ethernet. Other link layer protocols support variable length headers. Support for validation of these protocols removed the min length check for all protocols. Introduce an explicit dev->min_header_len parameter and drop all packets below this value. Initially, set it to non-zero only for Ethernet and loopback. Other protocols can follow in a patch to net-next. Fixes: 9ed988cd5915 ("packet: validate variable length ll headers") Reported-by: Sowmini Varadhan Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Acked-by: Sowmini Varadhan Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 70ad0291d517..27914672602d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1511,6 +1511,7 @@ enum netdev_priv_flags { * @max_mtu: Interface Maximum MTU value * @type: Interface hardware type * @hard_header_len: Maximum hardware header length. + * @min_header_len: Minimum hardware header length * * @needed_headroom: Extra headroom the hardware may need, but not in all * cases can this be guaranteed @@ -1728,6 +1729,7 @@ struct net_device { unsigned int max_mtu; unsigned short type; unsigned short hard_header_len; + unsigned short min_header_len; unsigned short needed_headroom; unsigned short needed_tailroom; @@ -2694,6 +2696,8 @@ static inline bool dev_validate_header(const struct net_device *dev, { if (likely(len >= dev->hard_header_len)) return true; + if (len < dev->min_header_len) + return false; if (capable(CAP_SYS_RAWIO)) { memset(ll_header + len, 0, dev->hard_header_len - len); -- cgit v1.2.3 From be5e5099183301fb7920f8f6b66bd3ac1f820a97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 16 Jan 2017 17:28:18 +0100 Subject: mtd: bcm47xxsflash: use platform_(set|get)_drvdata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have generic place & helpers for storing platform driver data so there is no reason for using custom priv pointer. This allows cleaning up struct bcma_sflash from unneeded fields. Signed-off-by: Rafał Miłecki Acked-by: Kalle Valo Acked-by: Boris Brezillon Signed-off-by: Brian Norris --- include/linux/bcma/bcma_driver_chipcommon.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index b20e3d56253f..2f1c690a3e66 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -593,9 +593,6 @@ struct bcma_sflash { u32 blocksize; u16 numblocks; u32 size; - - struct mtd_info *mtd; - void *priv; }; #endif -- cgit v1.2.3 From 21bdbb7102edeaebb5ec4ef530c8f442f7562c96 Mon Sep 17 00:00:00 2001 From: Neil Leeder Date: Tue, 7 Feb 2017 13:14:04 -0500 Subject: perf: add qcom l2 cache perf events driver Adds perf events support for L2 cache PMU. The L2 cache PMU driver is named 'l2cache_0' and can be used with perf events to profile L2 events such as cache hits and misses on Qualcomm Technologies processors. Reviewed-by: Mark Rutland Signed-off-by: Neil Leeder [will: minimise nesting in l2_cache_associate_cpu_with_cluster] [will: use kstrtoul for unsigned long, remove redunant .owner setting] Signed-off-by: Will Deacon --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 20bfefbe7594..1b7b2075b9cd 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -138,6 +138,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_CCI_ONLINE, CPUHP_AP_PERF_ARM_CCN_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, + CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_ONLINE_DYN, -- cgit v1.2.3 From cbaf58032efca401834518b905f528ac912449e4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 7 Feb 2017 11:58:15 -0500 Subject: svcrdma: Another sendto chunk list parsing update Commit 5fdca6531434 ("svcrdma: Renovate sendto chunk list parsing") missed a spot. svc_rdma_xdr_get_reply_hdr_len() also assumes the Write list has only one Write chunk. There's no harm in making this code more general. Signed-off-by: Chuck Lever Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/rpc_rdma.h | 9 +++++++++ include/linux/sunrpc/svc_rdma.h | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index cfda6adcf33c..245fc59b7324 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -109,6 +109,15 @@ struct rpcrdma_msg { } rm_body; }; +/* + * XDR sizes, in quads + */ +enum { + rpcrdma_fixed_maxsz = 4, + rpcrdma_segment_maxsz = 4, + rpcrdma_readchunk_maxsz = 2 + rpcrdma_segment_maxsz, +}; + /* * Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks */ diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 757fb963696c..551c51816352 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -218,7 +218,7 @@ extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *, struct rpcrdma_msg *, struct rpcrdma_msg *, enum rpcrdma_proc); -extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *); +extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp); /* svc_rdma_recvfrom.c */ extern int svc_rdma_recvfrom(struct svc_rqst *); -- cgit v1.2.3 From 98fc21d3bfd55a36ce9eb7b32d1ce146f0d1696d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 7 Feb 2017 11:58:23 -0500 Subject: svcrdma: Clean up RPC-over-RDMA Reply header encoder Replace C structure-based XDR decoding with pointer arithmetic. Pointer arithmetic is considered more portable, and is used throughout the kernel's existing XDR encoders. The gcc optimizer generates similar assembler code either way. Byte-swapping before a memory store on x86 typically results in an instruction pipeline stall. Avoid byte-swapping when encoding a new header. svcrdma currently doesn't alter a connection's credit grant value after the connection has been accepted, so it is effectively a constant. Cache the byte-swapped value in a separate field. Christoph suggested pulling the header encoding logic into the only function that uses it. Signed-off-by: Chuck Lever Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 551c51816352..25ecf92cae96 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -141,7 +141,8 @@ struct svcxprt_rdma { atomic_t sc_sq_avail; /* SQEs ready to be consumed */ unsigned int sc_sq_depth; /* Depth of SQ */ unsigned int sc_rq_depth; /* Depth of RQ */ - u32 sc_max_requests; /* Forward credits */ + __be32 sc_fc_credits; /* Forward credits */ + u32 sc_max_requests; /* Max requests */ u32 sc_max_bc_requests;/* Backward credits */ int sc_max_req_size; /* Size of each RQ WR buf */ @@ -214,10 +215,6 @@ extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, __be32, __be64, u32); -extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *, - struct rpcrdma_msg *, - struct rpcrdma_msg *, - enum rpcrdma_proc); extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp); /* svc_rdma_recvfrom.c */ -- cgit v1.2.3 From aba7d14ba18c93a2ab37d50b057a885964ef285c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 7 Feb 2017 11:58:48 -0500 Subject: svcrdma: Remove unused sc_dto_q field Clean up. Commit be99bb11400c ("svcrdma: Use new CQ API for RPC-over-RDMA server send CQs") removed code that used the sc_dto_q field, but neglected to remove sc_dto_q at the same time. Fixes: be99bb11400c ("svcrdma: Use new CQ API for RPC-over- ...") Signed-off-by: Chuck Lever Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 25ecf92cae96..f77a7bc1612c 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -172,7 +172,6 @@ struct svcxprt_rdma { wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ unsigned long sc_flags; - struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */ struct list_head sc_read_complete_q; struct work_struct sc_work; }; -- cgit v1.2.3 From a3ab867fa64f9aedb3b01d570db5b43d2fc355fc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 7 Feb 2017 11:58:56 -0500 Subject: svcrdma: Combine list fields in struct svc_rdma_op_ctxt Clean up: The free list and the dto_q list fields are never used at the same time. Reduce the size of struct svc_rdma_op_ctxt by combining these fields. Signed-off-by: Chuck Lever Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index f77a7bc1612c..b105f73e3ca2 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -70,7 +70,7 @@ extern atomic_t rdma_stat_sq_prod; * completes. */ struct svc_rdma_op_ctxt { - struct list_head free; + struct list_head list; struct svc_rdma_op_ctxt *read_hdr; struct svc_rdma_fastreg_mr *frmr; int hdr_count; @@ -78,7 +78,6 @@ struct svc_rdma_op_ctxt { struct ib_cqe cqe; struct ib_cqe reg_cqe; struct ib_cqe inv_cqe; - struct list_head dto_q; u32 byte_len; u32 position; struct svcxprt_rdma *xprt; -- cgit v1.2.3 From 6a2cac549b368960c9cd6a993f2f2cc6d720e935 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Wed, 8 Feb 2017 09:31:07 +0100 Subject: net: stmmac: Remove the bus_setup function pointer The bus_setup function pointer is not used at all, this patch remove it. Signed-off-by: Corentin Labbe Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index d76033d6726d..fc273e9d5f67 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -134,7 +134,6 @@ struct plat_stmmacenet_data { int tx_fifo_size; int rx_fifo_size; void (*fix_mac_speed)(void *priv, unsigned int speed); - void (*bus_setup)(void __iomem *ioaddr); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); void *bsp_priv; -- cgit v1.2.3 From 34fe7c05400663e01e23cddd1fea68bb7a2b3d29 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Feb 2017 14:46:48 +0100 Subject: block: enumify ELEVATOR_*_MERGE Switch these constants to an enum, and make let the compiler ensure that all callers of blk_try_merge and elv_merge handle all potential values. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/elevator.h | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index b5825c4f06f7..b38b4e651ea6 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -9,12 +9,21 @@ struct io_cq; struct elevator_type; -typedef int (elevator_merge_fn) (struct request_queue *, struct request **, +/* + * Return values from elevator merger + */ +enum elv_merge { + ELEVATOR_NO_MERGE = 0, + ELEVATOR_FRONT_MERGE = 1, + ELEVATOR_BACK_MERGE = 2, +}; + +typedef enum elv_merge (elevator_merge_fn) (struct request_queue *, struct request **, struct bio *); typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *); -typedef void (elevator_merged_fn) (struct request_queue *, struct request *, int); +typedef void (elevator_merged_fn) (struct request_queue *, struct request *, enum elv_merge); typedef int (elevator_allow_bio_merge_fn) (struct request_queue *, struct request *, struct bio *); @@ -87,7 +96,7 @@ struct elevator_mq_ops { bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *); int (*request_merge)(struct request_queue *q, struct request **, struct bio *); - void (*request_merged)(struct request_queue *, struct request *, int); + void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); void (*requests_merged)(struct request_queue *, struct request *, struct request *); struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *); void (*put_request)(struct request *); @@ -166,10 +175,12 @@ extern void elv_dispatch_sort(struct request_queue *, struct request *); extern void elv_dispatch_add_tail(struct request_queue *, struct request *); extern void elv_add_request(struct request_queue *, struct request *, int); extern void __elv_add_request(struct request_queue *, struct request *, int); -extern int elv_merge(struct request_queue *, struct request **, struct bio *); +extern enum elv_merge elv_merge(struct request_queue *, struct request **, + struct bio *); extern void elv_merge_requests(struct request_queue *, struct request *, struct request *); -extern void elv_merged_request(struct request_queue *, struct request *, int); +extern void elv_merged_request(struct request_queue *, struct request *, + enum elv_merge); extern void elv_bio_merged(struct request_queue *q, struct request *, struct bio *); extern bool elv_attempt_insert_merge(struct request_queue *, struct request *); @@ -218,13 +229,6 @@ extern void elv_rb_add(struct rb_root *, struct request *); extern void elv_rb_del(struct rb_root *, struct request *); extern struct request *elv_rb_find(struct rb_root *, sector_t); -/* - * Return values from elevator merger - */ -#define ELEVATOR_NO_MERGE 0 -#define ELEVATOR_FRONT_MERGE 1 -#define ELEVATOR_BACK_MERGE 2 - /* * Insertion selection */ -- cgit v1.2.3 From 1e739730c5b9ea80a2f25e9cf6e1025d47e3d8ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Feb 2017 14:46:49 +0100 Subject: block: optionally merge discontiguous discard bios into a single request Add a new merge strategy that merges discard bios into a request until the maximum number of discard ranges (or the maximum discard size) is reached from the plug merging code. I/O scheduler merging is not wired up yet but might also be useful, although not for fast devices like NVMe which are the only user for now. Note that for now we don't support limiting the size of each discard range, but if needed that can be added later. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 26 ++++++++++++++++++++++++++ include/linux/elevator.h | 1 + 2 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e0bac14347e6..aecca0e7d9ca 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -331,6 +331,7 @@ struct queue_limits { unsigned short logical_block_size; unsigned short max_segments; unsigned short max_integrity_segments; + unsigned short max_discard_segments; unsigned char misaligned; unsigned char discard_misaligned; @@ -1146,6 +1147,8 @@ extern void blk_queue_bounce_limit(struct request_queue *, u64); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_segments(struct request_queue *, unsigned short); +extern void blk_queue_max_discard_segments(struct request_queue *, + unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); @@ -1189,6 +1192,15 @@ extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); +/* + * Number of physical segments as sent to the device. + * + * Normally this is the number of discontiguous data segments sent by the + * submitter. But for data-less command like discard we might have no + * actual data segments submitted, but the driver might have to add it's + * own special payload. In that case we still return 1 here so that this + * special payload will be mapped. + */ static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) { if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) @@ -1196,6 +1208,15 @@ static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) return rq->nr_phys_segments; } +/* + * Number of discard segments (or ranges) the driver needs to fill in. + * Each discard bio merged into a request is counted as one segment. + */ +static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) +{ + return max_t(unsigned short, rq->nr_phys_segments, 1); +} + extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); extern long nr_blockdev_pages(void); @@ -1384,6 +1405,11 @@ static inline unsigned short queue_max_segments(struct request_queue *q) return q->limits.max_segments; } +static inline unsigned short queue_max_discard_segments(struct request_queue *q) +{ + return q->limits.max_discard_segments; +} + static inline unsigned int queue_max_segment_size(struct request_queue *q) { return q->limits.max_segment_size; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index b38b4e651ea6..8265b6330cc2 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -16,6 +16,7 @@ enum elv_merge { ELEVATOR_NO_MERGE = 0, ELEVATOR_FRONT_MERGE = 1, ELEVATOR_BACK_MERGE = 2, + ELEVATOR_DISCARD_MERGE = 3, }; typedef enum elv_merge (elevator_merge_fn) (struct request_queue *, struct request **, -- cgit v1.2.3 From b35ba01ea6979125e9c23fb322517748278f15e6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Feb 2017 14:46:50 +0100 Subject: nvme: support ranged discard requests NVMe supports up to 256 ranges per DSM command, so wire up support for ranged discards up to that limit. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 3d1c6f1b15c9..3e2ed49c3ad8 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -553,6 +553,8 @@ enum { NVME_DSMGMT_AD = 1 << 2, }; +#define NVME_DSM_MAX_RANGES 256 + struct nvme_dsm_range { __le32 cattr; __le32 nlb; -- cgit v1.2.3 From d6fc8821c2d2aba4cc18447a467f543e46e7367d Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 8 Feb 2017 09:54:42 +0800 Subject: SUNRPC/Cache: Always treat the invalid cache as unexpired When the first time pynfs runs after rpc/nfsd startup, always get the warning, "Got error: Connection closed" I found the problem is caused by, 1. A new startup of nfsd, rpc.mountd, etc, 2. A rpc request from client (pynfs test, or normal mounting), 3. An ip_map cache is created but invalid, so upcall to rpc.mountd, 4. rpc.mountd process the ip_map upcall, before write the valid data to nfsd, do auth_reload(), and check_useipaddr(), 5. For the first time, old_use_ipaddr = -1, it causes rpc.mountd do write_flush that doing cache_clean, 6. The ip_map cache will be treat as expired and clean, 7. When rpc.mountd write the valid data to nfsd, a new ip_map is created and updated, the cache_check of old ip_map(doing the upcall) will return -ETIMEDOUT. 8. RPC layer return SVC_CLOSE and close the xprt after commit 4d712ef1db05 "svcauth_gss: Close connection when dropping an incoming message" NeilBrown suggest in another email, "If CACHE_VALID is not set, then there is no data in the cache item, so there is nothing to expire. So it would be nice if cache items that don't have CACHE_VALID are never treated as expired." v3, change the order of the two patches v2, change the checking of CACHE_PENDING to CACHE_VALID Reviewed-by: NeilBrown Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 9dcf2c8fe1c5..70738504d647 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -204,8 +204,11 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) kref_put(&h->ref, cd->cache_put); } -static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) +static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h) { + if (!test_bit(CACHE_VALID, &h->flags)) + return false; + return (h->expiry_time < seconds_since_boot()) || (detail->flush_time >= h->last_refresh); } -- cgit v1.2.3 From 6080ef6e7c0a0592cbcca11200d879faf65e27d4 Mon Sep 17 00:00:00 2001 From: Jeff Westfahl Date: Tue, 10 Jan 2017 13:30:17 -0600 Subject: mtd: introduce function max_bad_blocks If implemented, 'max_bad_blocks' returns the maximum number of bad blocks to reserve for a MTD. An implementation for NAND is coming soon. Signed-off-by: Jeff Westfahl Signed-off-by: Zach Brown Acked-by: Boris Brezillon Acked-by: Brian Norris Signed-off-by: Brian Norris --- include/linux/mtd/mtd.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 13f8052b9ff9..5bb42c6dacdc 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -322,6 +322,7 @@ struct mtd_info { int (*_block_isreserved) (struct mtd_info *mtd, loff_t ofs); int (*_block_isbad) (struct mtd_info *mtd, loff_t ofs); int (*_block_markbad) (struct mtd_info *mtd, loff_t ofs); + int (*_max_bad_blocks) (struct mtd_info *mtd, loff_t ofs, size_t len); int (*_suspend) (struct mtd_info *mtd); void (*_resume) (struct mtd_info *mtd); void (*_reboot) (struct mtd_info *mtd); @@ -397,6 +398,18 @@ static inline int mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops) return ops->mode == MTD_OPS_AUTO_OOB ? mtd->oobavail : mtd->oobsize; } +static inline int mtd_max_bad_blocks(struct mtd_info *mtd, + loff_t ofs, size_t len) +{ + if (!mtd->_max_bad_blocks) + return -ENOTSUPP; + + if (mtd->size < (len + ofs) || ofs < 0) + return -EINVAL; + + return mtd->_max_bad_blocks(mtd, ofs, len); +} + int mtd_wunit_to_pairing_info(struct mtd_info *mtd, int wunit, struct mtd_pairing_info *info); int mtd_pairing_info_to_wunit(struct mtd_info *mtd, -- cgit v1.2.3 From 863d7d9c2e0cbbde6cd15f87c4431dd806f2d917 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 7 Feb 2017 21:47:16 +0800 Subject: sunrpc/nfs: cleanup procfs/pipefs entry in cache_detail Record flush/channel/content entries is useless, remove them. Signed-off-by: Kinglong Mee Signed-off-by: Anna Schumaker --- include/linux/sunrpc/cache.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 62a60eeacb0a..bb5c9c80f12e 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -63,15 +63,6 @@ struct cache_head { #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ -struct cache_detail_procfs { - struct proc_dir_entry *proc_ent; - struct proc_dir_entry *flush_ent, *channel_ent, *content_ent; -}; - -struct cache_detail_pipefs { - struct dentry *dir; -}; - struct cache_detail { struct module * owner; int hash_size; @@ -123,9 +114,9 @@ struct cache_detail { time_t last_warn; /* when we last warned about no readers */ union { - struct cache_detail_procfs procfs; - struct cache_detail_pipefs pipefs; - } u; + struct proc_dir_entry *procfs; + struct dentry *pipefs; + }; struct net *net; }; -- cgit v1.2.3 From 5786461bd8ea81433d81297215ee814a9a33ce7a Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 7 Feb 2017 21:48:11 +0800 Subject: sunrpc: rename NFS_NGROUPS to UNX_NGROUPS for auth unix NFS_NGROUPS has been move to sunrpc, rename to UNX_NGROUPS. Signed-off-by: Kinglong Mee Signed-off-by: Anna Schumaker --- include/linux/sunrpc/auth.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index b1bc62ba20a2..39c85fb1f0ed 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -32,6 +32,7 @@ */ #define UNX_MAXNODENAME __NEW_UTS_LEN #define UNX_CALLSLACK (21 + XDR_QUADLEN(UNX_MAXNODENAME)) +#define UNX_NGROUPS 16 struct rpcsec_gss_info; -- cgit v1.2.3 From af4926e5619f8a33463f7202f27ba091893ed2ad Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 7 Feb 2017 21:48:49 +0800 Subject: sunrpc: remove dead codes of cr_magic in rpc_cred Don't found any place using the cr_magic. Signed-off-by: Kinglong Mee Signed-off-by: Anna Schumaker --- include/linux/sunrpc/auth.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 39c85fb1f0ed..8fd3504946ad 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -64,9 +64,6 @@ struct rpc_cred { struct rcu_head cr_rcu; struct rpc_auth * cr_auth; const struct rpc_credops *cr_ops; -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) - unsigned long cr_magic; /* 0x0f4aa4f0 */ -#endif unsigned long cr_expire; /* when to gc */ unsigned long cr_flags; /* various flags */ atomic_t cr_count; /* ref count */ @@ -80,8 +77,6 @@ struct rpc_cred { #define RPCAUTH_CRED_HASHED 2 #define RPCAUTH_CRED_NEGATIVE 3 -#define RPCAUTH_CRED_MAGIC 0x0f4aa4f0 - /* rpc_auth au_flags */ #define RPCAUTH_AUTH_NO_CRKEY_TIMEOUT 0x0001 /* underlying cred has no key timeout */ -- cgit v1.2.3 From ceb374eb06848212e47fb884964a3ae5e79615b5 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Tue, 10 Jan 2017 13:30:19 -0600 Subject: mtd: nand: Add max_bb_per_die and blocks_per_die fields to nand_chip The fields max_bb_per_die and blocks_per_die are useful determining the number of bad blocks a MTD needs to allocate. How they are set will depend on if the chip is ONFI, JEDEC or a full-id entry in the nand_ids table. Signed-off-by: Zach Brown Acked-by: Boris Brezillon Acked-by: Brian Norris Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index c5f3a012ae62..4e1b441c3a7e 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -801,6 +801,9 @@ nand_get_sdr_timings(const struct nand_data_interface *conf) * supported, 0 otherwise. * @jedec_params: [INTERN] holds the JEDEC parameter page when JEDEC is * supported, 0 otherwise. + * @max_bb_per_die: [INTERN] the max number of bad blocks each die of a + * this nand device will encounter their life times. + * @blocks_per_die: [INTERN] The number of PEBs in a die * @read_retries: [INTERN] the number of read retry modes supported * @onfi_set_features: [REPLACEABLE] set the features for ONFI nand * @onfi_get_features: [REPLACEABLE] get the features for ONFI nand @@ -883,6 +886,8 @@ struct nand_chip { struct nand_onfi_params onfi_params; struct nand_jedec_params jedec_params; }; + u16 max_bb_per_die; + u32 blocks_per_die; struct nand_data_interface *data_interface; -- cgit v1.2.3 From 0911d0041c22922228ca52a977d7b0b0159fee4b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Feb 2017 14:30:53 -0800 Subject: mm: avoid returning VM_FAULT_RETRY from ->page_mkwrite handlers Some ->page_mkwrite handlers may return VM_FAULT_RETRY as its return code (GFS2 or Lustre can definitely do this). However VM_FAULT_RETRY from ->page_mkwrite is completely unhandled by the mm code and results in locking and writeably mapping the page which definitely is not what the caller wanted. Fix Lustre and block_page_mkwrite_ret() used by other filesystems (notably GFS2) to return VM_FAULT_NOPAGE instead which results in bailing out from the fault code, the CPU then retries the access, and we fault again effectively doing what the handler wanted. Link: http://lkml.kernel.org/r/20170203150729.15863-1-jack@suse.cz Signed-off-by: Jan Kara Reported-by: Al Viro Reviewed-by: Jinshan Xiong Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index d67ab83823ad..79591c3660cc 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -243,12 +243,10 @@ static inline int block_page_mkwrite_return(int err) { if (err == 0) return VM_FAULT_LOCKED; - if (err == -EFAULT) + if (err == -EFAULT || err == -EAGAIN) return VM_FAULT_NOPAGE; if (err == -ENOMEM) return VM_FAULT_OOM; - if (err == -EAGAIN) - return VM_FAULT_RETRY; /* -ENOSPC, -EDQUOT, -EIO ... */ return VM_FAULT_SIGBUS; } -- cgit v1.2.3 From 4d59b6ccf000862beed6fc0765d3209f98a8d8a2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 8 Feb 2017 14:30:56 -0800 Subject: cpumask: use nr_cpumask_bits for parsing functions Commit 513e3d2d11c9 ("cpumask: always use nr_cpu_ids in formatting and parsing functions") converted both cpumask printing and parsing functions to use nr_cpu_ids instead of nr_cpumask_bits. While this was okay for the printing functions as it just picked one of the two output formats that we were alternating between depending on a kernel config, doing the same for parsing wasn't okay. nr_cpumask_bits can be either nr_cpu_ids or NR_CPUS. We can always use nr_cpu_ids but that is a variable while NR_CPUS is a constant, so it can be more efficient to use NR_CPUS when we can get away with it. Converting the printing functions to nr_cpu_ids makes sense because it affects how the masks get presented to userspace and doesn't break anything; however, using nr_cpu_ids for parsing functions can incorrectly leave the higher bits uninitialized while reading in these masks from userland. As all testing and comparison functions use nr_cpumask_bits which can be larger than nr_cpu_ids, the parsed cpumasks can erroneously yield false negative results. This made the taskstats interface incorrectly return -EINVAL even when the inputs were correct. Fix it by restoring the parse functions to use nr_cpumask_bits instead of nr_cpu_ids. Link: http://lkml.kernel.org/r/20170206182442.GB31078@htj.duckdns.org Fixes: 513e3d2d11c9 ("cpumask: always use nr_cpu_ids in formatting and parsing functions") Signed-off-by: Tejun Heo Reported-by: Martin Steigerwald Debugged-by: Ben Hutchings Cc: [4.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c717f5ea88cb..b3d2c1a89ac4 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -560,7 +560,7 @@ static inline void cpumask_copy(struct cpumask *dstp, static inline int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) { - return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpu_ids); + return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); } /** @@ -575,7 +575,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp) { return bitmap_parselist_user(buf, len, cpumask_bits(dstp), - nr_cpu_ids); + nr_cpumask_bits); } /** @@ -590,7 +590,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) char *nl = strchr(buf, '\n'); unsigned int len = nl ? (unsigned int)(nl - buf) : strlen(buf); - return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpu_ids); + return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits); } /** @@ -602,7 +602,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) */ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) { - return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpu_ids); + return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits); } /** -- cgit v1.2.3 From 16d0bf1882109da84b4b90d7ba4fc7ba18db0d67 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 8 Feb 2017 19:37:55 +0100 Subject: PM / Domains: Provide dummy governors if CONFIG_PM_GENERIC_DOMAINS=n This allows to compile-test drivers that refer to governors (always by reference) when CONFIG_PM_GENERIC_DOMAINS=n. Signed-off-by: Geert Uytterhoeven Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 81ece61075df..5339ed5bd6f9 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -182,6 +182,9 @@ static inline int pm_genpd_remove(struct generic_pm_domain *genpd) { return -ENOTSUPP; } + +#define simple_qos_governor (*(struct dev_power_governor *)(NULL)) +#define pm_domain_always_on_gov (*(struct dev_power_governor *)(NULL)) #endif static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, -- cgit v1.2.3 From 72f2ff0deb870145a5a2d24cd75b4f9936159a62 Mon Sep 17 00:00:00 2001 From: Dongdong Liu Date: Fri, 3 Feb 2017 15:02:07 -0600 Subject: PCI: Disable MSI for HiSilicon Hip06/Hip07 Root Ports The PCIe Root Port in Hip06/Hip07 SoCs advertises an MSI capability, but it cannot generate MSIs. It can transfer MSI/MSI-X from downstream devices, but does not support MSI/MSI-X itself. Add a quirk to prevent use of MSI/MSI-X by the Root Port. [bhelgaas: changelog, sort vendor ID #define, drop device ID #define] Signed-off-by: Dongdong Liu Signed-off-by: Bjorn Helgaas Reviewed-by: Gabriele Paoloni Reviewed-by: Zhou Wang --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 73dda0edcb97..a4f77feecbb0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2516,6 +2516,8 @@ #define PCI_DEVICE_ID_KORENIX_JETCARDF2 0x1700 #define PCI_DEVICE_ID_KORENIX_JETCARDF3 0x17ff +#define PCI_VENDOR_ID_HUAWEI 0x19e5 + #define PCI_VENDOR_ID_NETRONOME 0x19ee #define PCI_DEVICE_ID_NETRONOME_NFP3200 0x3200 #define PCI_DEVICE_ID_NETRONOME_NFP3240 0x3240 -- cgit v1.2.3 From e553f539f2af39db5e3b2c273cc1a22d34be49ad Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Mon, 17 Oct 2016 12:17:43 -0700 Subject: of: make of_device_make_bus_id() static of_device_make_bus_id() was changed to non-static by commit c66012253800 ("of/device: Make of_device_make_bus_id() usable by other code") more than 6 years ago, but there are no users of it outside of platform.c. Make the function static again. Signed-off-by: Frank Rowand Signed-off-by: Rob Herring --- include/linux/of_device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index cc7dd687a89d..5c2aeed17dd4 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -13,7 +13,6 @@ struct device; #ifdef CONFIG_OF extern const struct of_device_id *of_match_device( const struct of_device_id *matches, const struct device *dev); -extern void of_device_make_bus_id(struct device *dev); /** * of_driver_match_device - Tell if a driver's of_match_table matches a device. -- cgit v1.2.3 From d23bb113952db88b03a71c9533e5a40e444e18d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Feb 2017 11:17:52 -0500 Subject: SUNRPC: Remove unused function rpc_get_timeout() Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 333ad11b3dd9..33f216edb434 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -182,7 +182,6 @@ int rpc_protocol(struct rpc_clnt *); struct net * rpc_net_ns(struct rpc_clnt *); size_t rpc_max_payload(struct rpc_clnt *); size_t rpc_max_bc_payload(struct rpc_clnt *); -unsigned long rpc_get_timeout(struct rpc_clnt *clnt); void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); -- cgit v1.2.3 From 7196dbb02ea05835b9ee56910ee82cb55422c7f1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Feb 2017 11:17:54 -0500 Subject: SUNRPC: Allow changing of the TCP timeout parameters on the fly When the NFSv4 server tells us the lease period, we usually want to adjust down the timeout parameters on the TCP connection to ensure that we don't miss lease renewals due to a faulty connection. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 4 ++++ include/linux/sunrpc/xprtsock.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a5da60b24d83..eab1c749e192 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -137,6 +137,9 @@ struct rpc_xprt_ops { void (*release_request)(struct rpc_task *task); void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); + void (*set_connect_timeout)(struct rpc_xprt *xprt, + unsigned long connect_timeout, + unsigned long reconnect_timeout); void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); int (*enable_swap)(struct rpc_xprt *xprt); void (*disable_swap)(struct rpc_xprt *xprt); @@ -221,6 +224,7 @@ struct rpc_xprt { struct timer_list timer; unsigned long last_used, idle_timeout, + connect_timeout, max_reconnect_timeout; /* diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index bef3fb0abb8f..c9959d7e3579 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -55,6 +55,8 @@ struct sock_xprt { size_t rcvsize, sndsize; + struct rpc_timeout tcp_timeout; + /* * Saved socket callback addresses */ @@ -81,6 +83,7 @@ struct sock_xprt { #define XPRT_SOCK_CONNECTING 1U #define XPRT_SOCK_DATA_READY (2) +#define XPRT_SOCK_UPD_TIMEOUT (3) #endif /* __KERNEL__ */ -- cgit v1.2.3 From 26ae102f2cfd0215daa57dc790aa3bfe534403a9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Feb 2017 11:17:55 -0500 Subject: NFSv4: Set the connection timeout to match the lease period Set the timeout for TCP connections to be 1 lease period to ensure that we don't lose our lease due to a faulty TCP connection. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 33f216edb434..6095ecba0dde 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -201,8 +201,9 @@ int rpc_clnt_add_xprt(struct rpc_clnt *, struct xprt_create *, struct rpc_xprt *, void *), void *data); -void rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, - unsigned long timeo); +void rpc_set_connect_timeout(struct rpc_clnt *clnt, + unsigned long connect_timeout, + unsigned long reconnect_timeout); int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *, struct rpc_xprt_switch *, -- cgit v1.2.3 From 90858794c96028ec1294fda12488a19f3a2b1d4a Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sun, 24 Jul 2016 20:51:44 -0400 Subject: module.h: remove extable.h include now users have migrated With hopefully most/all users of module.h that were looking for exception table functions moved over to the new extable.h header, we can remove the back-compat include that let us transition without introducing build regressions. Cc: Rusty Russell Acked-by: Rusty Russell Acked-by: Jessica Yu Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Paul Gortmaker --- include/linux/module.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 7c84273d60b9..2e6df4c41c86 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -18,7 +18,6 @@ #include #include #include -#include /* only as arch move module.h -> extable.h */ #include #include -- cgit v1.2.3 From 0764c604c8128f17fd740ff8b1701d0a1301eb7e Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 3 Feb 2017 11:29:26 -0600 Subject: PM / OPP: Expose _of_get_opp_desc_node as dev_pm_opp API Rename _of_get_opp_desc_node to dev_pm_opp_of_get_opp_desc_node and add it to include/linux/pm_opp.h to allow other drivers, such as platform OPP and cpufreq drivers, to make use of it. Acked-by: Viresh Kumar Signed-off-by: Dave Gerlach Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 731d548657aa..a6685b3dde26 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -288,6 +288,7 @@ void dev_pm_opp_of_remove_table(struct device *dev); int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask); void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask); int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); +struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev); #else static inline int dev_pm_opp_of_add_table(struct device *dev) { @@ -311,6 +312,11 @@ static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct { return -ENOTSUPP; } + +static inline struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev) +{ + return NULL; +} #endif #endif /* __LINUX_OPP_H__ */ -- cgit v1.2.3 From 9faf1c0fd5a943e498dbc0c86ff42e965b347d08 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 9 Feb 2017 01:18:15 +0800 Subject: sctp: drop unnecessary __packed from some stream reconf structures commit 85c727b59483 ("sctp: drop __packed from almost all SCTP structures") has removed __packed from almost all SCTP structures. But there still are three structures where it should be dropped. This patch is to remove it from some stream reconf structures. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/sctp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 2408c6877ca0..d74fca3f3141 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -721,7 +721,7 @@ struct sctp_infox { struct sctp_reconf_chunk { sctp_chunkhdr_t chunk_hdr; __u8 params[0]; -} __packed; +}; struct sctp_strreset_outreq { sctp_paramhdr_t param_hdr; @@ -729,12 +729,12 @@ struct sctp_strreset_outreq { __u32 response_seq; __u32 send_reset_at_tsn; __u16 list_of_streams[0]; -} __packed; +}; struct sctp_strreset_inreq { sctp_paramhdr_t param_hdr; __u32 request_seq; __u16 list_of_streams[0]; -} __packed; +}; #endif /* __LINUX_SCTP_H__ */ -- cgit v1.2.3 From c56480a1e90261842f54f3a5a9ebc12d827f0c3e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 9 Feb 2017 01:18:17 +0800 Subject: sctp: add support for generating stream reconf ssn/tsn reset request chunk This patch is to define SSN/TSN Reset Request Parameter described in rfc6525 section 4.3. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index d74fca3f3141..71c0d41d9a59 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -737,4 +737,9 @@ struct sctp_strreset_inreq { __u16 list_of_streams[0]; }; +struct sctp_strreset_tsnreq { + sctp_paramhdr_t param_hdr; + __u32 request_seq; +}; + #endif /* __LINUX_SCTP_H__ */ -- cgit v1.2.3 From 78098117f8bfad4f2104c3f7b6b69071af95a246 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 9 Feb 2017 01:18:19 +0800 Subject: sctp: add support for generating stream reconf add incoming/outgoing streams request chunk This patch is to define Add Incoming/Outgoing Streams Request Parameter described in rfc6525 section 4.5 and 4.6. They can be in one same chunk trunk as rfc6525 section 3.1-7 describes, so make them in one function. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/sctp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 71c0d41d9a59..b055788de0cf 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -742,4 +742,11 @@ struct sctp_strreset_tsnreq { __u32 request_seq; }; +struct sctp_strreset_addstrm { + sctp_paramhdr_t param_hdr; + __u32 request_seq; + __u16 number_of_streams; + __u16 reserved; +}; + #endif /* __LINUX_SCTP_H__ */ -- cgit v1.2.3 From 2fd260f03b6a365bad48522f3948463928f91c2c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 24 Jan 2017 19:35:56 +0200 Subject: PCI/AER: Remove unused .link_reset() callback No hardware seems to actually call .link_reset(), and no driver implements it as more than a nop stub. Drop mentions of the callback from everywhere. It's dropped from the documentation as well, but the doc really needs to be updated to reflect reality better (e.g., on PCIe, slot reset is the link reset). This will be done in a later patch. Signed-off-by: Michael S. Tsirkin Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index e2d1a124216a..2c0158b4dbed 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -678,9 +678,6 @@ struct pci_error_handlers { /* MMIO has been re-enabled, but not DMA */ pci_ers_result_t (*mmio_enabled)(struct pci_dev *dev); - /* PCI Express link has been reset */ - pci_ers_result_t (*link_reset)(struct pci_dev *dev); - /* PCI slot has been reset */ pci_ers_result_t (*slot_reset)(struct pci_dev *dev); -- cgit v1.2.3 From 42e9401bd1467d22c4dc4d2c637347b874e6a80b Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 9 Feb 2017 11:50:24 +0100 Subject: mtd: Add partition device node to mtd partition devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The user visible change here is that mtd partitions get an of_node link in sysfs. Signed-off-by: Sascha Hauer Signed-off-by: Uwe Kleine-König Signed-off-by: Brian Norris --- include/linux/mtd/partitions.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index 70736e1e6c8f..06df1e06b6e0 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -41,6 +41,7 @@ struct mtd_partition { uint64_t size; /* partition size */ uint64_t offset; /* offset within the master MTD space */ uint32_t mask_flags; /* master MTD flags to mask out for this partition */ + struct device_node *of_node; }; #define MTDPART_OFS_RETAIN (-3) -- cgit v1.2.3 From f405df5de3170c00e5c54f8b7cf4766044a032ba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 18:06:19 +0100 Subject: refcount_t: Introduce a special purpose refcount type Provide refcount_t, an atomic_t like primitive built just for refcounting. It provides saturation semantics such that overflow becomes impossible and thereby 'spurious' use-after-free is avoided. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 294 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 include/linux/refcount.h (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h new file mode 100644 index 000000000000..600aadf9cca4 --- /dev/null +++ b/include/linux/refcount.h @@ -0,0 +1,294 @@ +#ifndef _LINUX_REFCOUNT_H +#define _LINUX_REFCOUNT_H + +/* + * Variant of atomic_t specialized for reference counts. + * + * The interface matches the atomic_t interface (to aid in porting) but only + * provides the few functions one should use for reference counting. + * + * It differs in that the counter saturates at UINT_MAX and will not move once + * there. This avoids wrapping the counter and causing 'spurious' + * use-after-free issues. + * + * Memory ordering rules are slightly relaxed wrt regular atomic_t functions + * and provide only what is strictly required for refcounts. + * + * The increments are fully relaxed; these will not provide ordering. The + * rationale is that whatever is used to obtain the object we're increasing the + * reference count on will provide the ordering. For locked data structures, + * its the lock acquire, for RCU/lockless data structures its the dependent + * load. + * + * Do note that inc_not_zero() provides a control dependency which will order + * future stores against the inc, this ensures we'll never modify the object + * if we did not in fact acquire a reference. + * + * The decrements will provide release order, such that all the prior loads and + * stores will be issued before, it also provides a control dependency, which + * will order us against the subsequent free(). + * + * The control dependency is against the load of the cmpxchg (ll/sc) that + * succeeded. This means the stores aren't fully ordered, but this is fine + * because the 1->0 transition indicates no concurrency. + * + * Note that the allocator is responsible for ordering things between free() + * and alloc(). + * + */ + +#include +#include +#include +#include + +#ifdef CONFIG_DEBUG_REFCOUNT +#define REFCOUNT_WARN(cond, str) WARN_ON(cond) +#define __refcount_check __must_check +#else +#define REFCOUNT_WARN(cond, str) (void)(cond) +#define __refcount_check +#endif + +typedef struct refcount_struct { + atomic_t refs; +} refcount_t; + +#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } + +static inline void refcount_set(refcount_t *r, unsigned int n) +{ + atomic_set(&r->refs, n); +} + +static inline unsigned int refcount_read(const refcount_t *r) +{ + return atomic_read(&r->refs); +} + +static inline __refcount_check +bool refcount_add_not_zero(unsigned int i, refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + if (!val) + return false; + + if (unlikely(val == UINT_MAX)) + return true; + + new = val + i; + if (new < val) + new = UINT_MAX; + old = atomic_cmpxchg_relaxed(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + + return true; +} + +static inline void refcount_add(unsigned int i, refcount_t *r) +{ + REFCOUNT_WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); +} + +/* + * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + */ +static inline __refcount_check +bool refcount_inc_not_zero(refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + new = val + 1; + + if (!val) + return false; + + if (unlikely(!new)) + return true; + + old = atomic_cmpxchg_relaxed(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + + return true; +} + +/* + * Similar to atomic_inc(), will saturate at UINT_MAX and WARN. + * + * Provides no memory ordering, it is assumed the caller already has a + * reference on the object, will WARN when this is not so. + */ +static inline void refcount_inc(refcount_t *r) +{ + REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); +} + +/* + * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to + * decrement when saturated at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides a control dependency such that free() must come after. + * See the comment on top. + */ +static inline __refcount_check +bool refcount_sub_and_test(unsigned int i, refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + if (unlikely(val == UINT_MAX)) + return false; + + new = val - i; + if (new > val) { + REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + return false; + } + + old = atomic_cmpxchg_release(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + return !new; +} + +static inline __refcount_check +bool refcount_dec_and_test(refcount_t *r) +{ + return refcount_sub_and_test(1, r); +} + +/* + * Similar to atomic_dec(), it will WARN on underflow and fail to decrement + * when saturated at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before. + */ +static inline +void refcount_dec(refcount_t *r) +{ + REFCOUNT_WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); +} + +/* + * No atomic_t counterpart, it attempts a 1 -> 0 transition and returns the + * success thereof. + * + * Like all decrement operations, it provides release memory order and provides + * a control dependency. + * + * It can be used like a try-delete operator; this explicit case is provided + * and not cmpxchg in generic, because that would allow implementing unsafe + * operations. + */ +static inline __refcount_check +bool refcount_dec_if_one(refcount_t *r) +{ + return atomic_cmpxchg_release(&r->refs, 1, 0) == 1; +} + +/* + * No atomic_t counterpart, it decrements unless the value is 1, in which case + * it will return false. + * + * Was often done like: atomic_add_unless(&var, -1, 1) + */ +static inline __refcount_check +bool refcount_dec_not_one(refcount_t *r) +{ + unsigned int old, new, val = atomic_read(&r->refs); + + for (;;) { + if (unlikely(val == UINT_MAX)) + return true; + + if (val == 1) + return false; + + new = val - 1; + if (new > val) { + REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + return true; + } + + old = atomic_cmpxchg_release(&r->refs, val, new); + if (old == val) + break; + + val = old; + } + + return true; +} + +/* + * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail + * to decrement when saturated at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides a control dependency such that free() must come after. + * See the comment on top. + */ +static inline __refcount_check +bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) +{ + if (refcount_dec_not_one(r)) + return false; + + mutex_lock(lock); + if (!refcount_dec_and_test(r)) { + mutex_unlock(lock); + return false; + } + + return true; +} + +/* + * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to + * decrement when saturated at UINT_MAX. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides a control dependency such that free() must come after. + * See the comment on top. + */ +static inline __refcount_check +bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) +{ + if (refcount_dec_not_one(r)) + return false; + + spin_lock(lock); + if (!refcount_dec_and_test(r)) { + spin_unlock(lock); + return false; + } + + return true; +} + +#endif /* _LINUX_REFCOUNT_H */ -- cgit v1.2.3 From 10383aea2f445bce9b2a2b308def08134b438c8e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Nov 2016 18:06:19 +0100 Subject: kref: Implement 'struct kref' using refcount_t Use the refcount_t 'atomic' type to implement 'struct kref', this makes kref more robust by bringing saturation semantics. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/kref.h | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kref.h b/include/linux/kref.h index 9db9685fed84..f4156f88f557 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -15,17 +15,14 @@ #ifndef _KREF_H_ #define _KREF_H_ -#include -#include -#include -#include #include +#include struct kref { - atomic_t refcount; + refcount_t refcount; }; -#define KREF_INIT(n) { .refcount = ATOMIC_INIT(n), } +#define KREF_INIT(n) { .refcount = REFCOUNT_INIT(n), } /** * kref_init - initialize object. @@ -33,12 +30,12 @@ struct kref { */ static inline void kref_init(struct kref *kref) { - atomic_set(&kref->refcount, 1); + refcount_set(&kref->refcount, 1); } -static inline int kref_read(const struct kref *kref) +static inline unsigned int kref_read(const struct kref *kref) { - return atomic_read(&kref->refcount); + return refcount_read(&kref->refcount); } /** @@ -47,11 +44,7 @@ static inline int kref_read(const struct kref *kref) */ static inline void kref_get(struct kref *kref) { - /* If refcount was 0 before incrementing then we have a race - * condition when this kref is freeing by some other thread right now. - * In this case one should use kref_get_unless_zero() - */ - WARN_ON_ONCE(atomic_inc_return(&kref->refcount) < 2); + refcount_inc(&kref->refcount); } /** @@ -75,7 +68,7 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref) { WARN_ON(release == NULL); - if (atomic_dec_and_test(&kref->refcount)) { + if (refcount_dec_and_test(&kref->refcount)) { release(kref); return 1; } @@ -88,7 +81,7 @@ static inline int kref_put_mutex(struct kref *kref, { WARN_ON(release == NULL); - if (atomic_dec_and_mutex_lock(&kref->refcount, lock)) { + if (refcount_dec_and_mutex_lock(&kref->refcount, lock)) { release(kref); return 1; } @@ -101,7 +94,7 @@ static inline int kref_put_lock(struct kref *kref, { WARN_ON(release == NULL); - if (atomic_dec_and_lock(&kref->refcount, lock)) { + if (refcount_dec_and_lock(&kref->refcount, lock)) { release(kref); return 1; } @@ -126,6 +119,6 @@ static inline int kref_put_lock(struct kref *kref, */ static inline int __must_check kref_get_unless_zero(struct kref *kref) { - return atomic_add_unless(&kref->refcount, 1, 0); + return refcount_inc_not_zero(&kref->refcount); } #endif /* _KREF_H_ */ -- cgit v1.2.3 From 6ce77bfd6cedbff61eabf8837dc0901bb671cc86 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 26 Jan 2017 11:40:57 +0200 Subject: perf/core: Allow kernel filters on CPU events While supporting file-based address filters for CPU events requires some extra context switch handling, kernel address filters are easy, since the kernel mapping is preserved across address spaces. It is also useful as it permits tracing scheduling paths of the kernel. This patch allows setting up kernel filters for CPU events. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Mathieu Poirier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Will Deacon Cc: vince@deater.net Link: http://lkml.kernel.org/r/20170126094057.13805-4-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5c58e93c130c..000fdb211c7d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -482,6 +482,7 @@ struct perf_addr_filter { * @list: list of filters for this event * @lock: spinlock that serializes accesses to the @list and event's * (and its children's) filter generations. + * @nr_file_filters: number of file-based filters * * A child event will use parent's @list (and therefore @lock), so they are * bundled together; see perf_event_addr_filters(). @@ -489,6 +490,7 @@ struct perf_addr_filter { struct perf_addr_filters_head { struct list_head list; raw_spinlock_t lock; + unsigned int nr_file_filters; }; /** -- cgit v1.2.3 From dfb4357da6ddbdf57d583ba64361c9d792b0e0b1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 8 Feb 2017 11:26:59 -0800 Subject: time: Remove CONFIG_TIMER_STATS Currently CONFIG_TIMER_STATS exposes process information across namespaces: kernel/time/timer_list.c print_timer(): SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); /proc/timer_list: #11: <0000000000000000>, hrtimer_wakeup, S:01, do_nanosleep, cron/2570 Given that the tracer can give the same information, this patch entirely removes CONFIG_TIMER_STATS. Suggested-by: Thomas Gleixner Signed-off-by: Kees Cook Acked-by: John Stultz Cc: Nicolas Pitre Cc: linux-doc@vger.kernel.org Cc: Lai Jiangshan Cc: Shuah Khan Cc: Xing Gao Cc: Jonathan Corbet Cc: Jessica Frazelle Cc: kernel-hardening@lists.openwall.com Cc: Nicolas Iooss Cc: "Paul E. McKenney" Cc: Petr Mladek Cc: Richard Cochran Cc: Tejun Heo Cc: Michal Marek Cc: Josh Poimboeuf Cc: Dmitry Vyukov Cc: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Olof Johansson Cc: Andrew Morton Cc: linux-api@vger.kernel.org Cc: Arjan van de Ven Link: http://lkml.kernel.org/r/20170208192659.GA32582@beast Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 11 ----------- include/linux/timer.h | 45 --------------------------------------------- 2 files changed, 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index cdab81ba29f8..e52b427223ba 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -88,12 +88,6 @@ enum hrtimer_restart { * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) * @is_rel: Set if the timer was armed relative - * @start_pid: timer statistics field to store the pid of the task which - * started the timer - * @start_site: timer statistics field to store the site where the timer - * was started - * @start_comm: timer statistics field to store the name of the process which - * started the timer * * The hrtimer structure must be initialized by hrtimer_init() */ @@ -104,11 +98,6 @@ struct hrtimer { struct hrtimer_clock_base *base; u8 state; u8 is_rel; -#ifdef CONFIG_TIMER_STATS - int start_pid; - void *start_site; - char start_comm[16]; -#endif }; /** diff --git a/include/linux/timer.h b/include/linux/timer.h index 51d601f192d4..5a209b84fd9e 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -20,11 +20,6 @@ struct timer_list { unsigned long data; u32 flags; -#ifdef CONFIG_TIMER_STATS - int start_pid; - void *start_site; - char start_comm[16]; -#endif #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; #endif @@ -197,46 +192,6 @@ extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); */ #define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) -/* - * Timer-statistics info: - */ -#ifdef CONFIG_TIMER_STATS - -extern int timer_stats_active; - -extern void init_timer_stats(void); - -extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, - void *timerf, char *comm, u32 flags); - -extern void __timer_stats_timer_set_start_info(struct timer_list *timer, - void *addr); - -static inline void timer_stats_timer_set_start_info(struct timer_list *timer) -{ - if (likely(!timer_stats_active)) - return; - __timer_stats_timer_set_start_info(timer, __builtin_return_address(0)); -} - -static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) -{ - timer->start_site = NULL; -} -#else -static inline void init_timer_stats(void) -{ -} - -static inline void timer_stats_timer_set_start_info(struct timer_list *timer) -{ -} - -static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) -{ -} -#endif - extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); -- cgit v1.2.3 From 534766dfef999f7e7349bbd91cd19c1673792af3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 31 Jan 2017 16:58:42 +0100 Subject: iommu: Rename iommu_get_instance() Rename the function to iommu_ops_from_fwnode(), because that is what the function actually does. The new name is much more descriptive about what the function does. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++-- include/linux/of_iommu.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0ff5111f6959..085e1f0e6c07 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -354,7 +354,7 @@ void iommu_fwspec_free(struct device *dev); int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids); void iommu_register_instance(struct fwnode_handle *fwnode, const struct iommu_ops *ops); -const struct iommu_ops *iommu_get_instance(struct fwnode_handle *fwnode); +const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode); #else /* CONFIG_IOMMU_API */ @@ -590,7 +590,7 @@ static inline void iommu_register_instance(struct fwnode_handle *fwnode, } static inline -const struct iommu_ops *iommu_get_instance(struct fwnode_handle *fwnode) +const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) { return NULL; } diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 6a7fc5051099..66fcbc949899 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -39,7 +39,7 @@ static inline void of_iommu_set_ops(struct device_node *np, static inline const struct iommu_ops *of_iommu_get_ops(struct device_node *np) { - return iommu_get_instance(&np->fwnode); + return iommu_ops_from_fwnode(&np->fwnode); } extern struct of_device_id __iommu_of_table; -- cgit v1.2.3 From b0119e870837dcd15a207b4701542ebac5d19b45 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 1 Feb 2017 13:23:08 +0100 Subject: iommu: Introduce new 'struct iommu_device' This struct represents one hardware iommu in the iommu core code. For now it only has the iommu-ops associated with it, but that will be extended soon. The register/unregister interface is also added, as well as making use of it in the Intel and AMD IOMMU drivers. Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 2 ++ include/linux/iommu.h | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d49e26c6cdc7..99a65a397861 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -440,6 +441,7 @@ struct intel_iommu { struct irq_domain *ir_msi_domain; #endif struct device *iommu_dev; /* IOMMU-sysfs device */ + struct iommu_device iommu; /* IOMMU core code handle */ int node; u32 flags; /* Software defined flags */ }; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 085e1f0e6c07..900ddd212364 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -204,6 +204,26 @@ struct iommu_ops { unsigned long pgsize_bitmap; }; +/** + * struct iommu_device - IOMMU core representation of one IOMMU hardware + * instance + * @list: Used by the iommu-core to keep a list of registered iommus + * @ops: iommu-ops for talking to this iommu + */ +struct iommu_device { + struct list_head list; + const struct iommu_ops *ops; +}; + +int iommu_device_register(struct iommu_device *iommu); +void iommu_device_unregister(struct iommu_device *iommu); + +static inline void iommu_device_set_ops(struct iommu_device *iommu, + const struct iommu_ops *ops) +{ + iommu->ops = ops; +} + #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ #define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ #define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ @@ -361,6 +381,7 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode); struct iommu_ops {}; struct iommu_group {}; struct iommu_fwspec {}; +struct iommu_device {}; static inline bool iommu_present(struct bus_type *bus) { @@ -558,6 +579,20 @@ static inline void iommu_device_destroy(struct device *dev) { } +static inline int iommu_device_register(struct iommu_device *iommu) +{ + return -ENODEV; +} + +static inline void iommu_device_set_ops(struct iommu_device *iommu, + const struct iommu_ops *ops) +{ +} + +static inline void iommu_device_unregister(struct iommu_device *iommu) +{ +} + static inline int iommu_device_link(struct device *dev, struct device *link) { return -EINVAL; -- cgit v1.2.3 From 39ab9555c24110671f8dc671311a26e5c985b592 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 1 Feb 2017 16:56:46 +0100 Subject: iommu: Add sysfs bindings for struct iommu_device There is currently support for iommu sysfs bindings, but those need to be implemented in the IOMMU drivers. Add a more generic version of this by adding a struct device to struct iommu_device and use that for the sysfs bindings. Also convert the AMD and Intel IOMMU driver to make use of it. Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 1 - include/linux/iommu.h | 33 ++++++++++++++++++--------------- 2 files changed, 18 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 99a65a397861..3ba9b536387b 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -440,7 +440,6 @@ struct intel_iommu { struct irq_domain *ir_domain; struct irq_domain *ir_msi_domain; #endif - struct device *iommu_dev; /* IOMMU-sysfs device */ struct iommu_device iommu; /* IOMMU core code handle */ int node; u32 flags; /* Software defined flags */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 900ddd212364..c578ca135bed 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -209,14 +209,21 @@ struct iommu_ops { * instance * @list: Used by the iommu-core to keep a list of registered iommus * @ops: iommu-ops for talking to this iommu + * @dev: struct device for sysfs handling */ struct iommu_device { struct list_head list; const struct iommu_ops *ops; + struct device dev; }; int iommu_device_register(struct iommu_device *iommu); void iommu_device_unregister(struct iommu_device *iommu); +int iommu_device_sysfs_add(struct iommu_device *iommu, + struct device *parent, + const struct attribute_group **groups, + const char *fmt, ...) __printf(4, 5); +void iommu_device_sysfs_remove(struct iommu_device *iommu); static inline void iommu_device_set_ops(struct iommu_device *iommu, const struct iommu_ops *ops) @@ -287,10 +294,6 @@ extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr, void *data); extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, void *data); -struct device *iommu_device_create(struct device *parent, void *drvdata, - const struct attribute_group **groups, - const char *fmt, ...) __printf(4, 5); -void iommu_device_destroy(struct device *dev); int iommu_device_link(struct device *dev, struct device *link); void iommu_device_unlink(struct device *dev, struct device *link); @@ -567,29 +570,29 @@ static inline int iommu_domain_set_attr(struct iommu_domain *domain, return -EINVAL; } -static inline struct device *iommu_device_create(struct device *parent, - void *drvdata, - const struct attribute_group **groups, - const char *fmt, ...) +static inline int iommu_device_register(struct iommu_device *iommu) { - return ERR_PTR(-ENODEV); + return -ENODEV; } -static inline void iommu_device_destroy(struct device *dev) +static inline void iommu_device_set_ops(struct iommu_device *iommu, + const struct iommu_ops *ops) { } -static inline int iommu_device_register(struct iommu_device *iommu) +static inline void iommu_device_unregister(struct iommu_device *iommu) { - return -ENODEV; } -static inline void iommu_device_set_ops(struct iommu_device *iommu, - const struct iommu_ops *ops) +static inline int iommu_device_sysfs_add(struct iommu_device *iommu, + struct device *parent, + const struct attribute_group **groups, + const char *fmt, ...) { + return -ENODEV; } -static inline void iommu_device_unregister(struct iommu_device *iommu) +static inline void iommu_device_sysfs_remove(struct iommu_device *iommu) { } -- cgit v1.2.3 From e3d10af1128b6bc394f21656ff13753130f3c107 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 1 Feb 2017 17:23:22 +0100 Subject: iommu: Make iommu_device_link/unlink take a struct iommu_device This makes the interface more consistent with iommu_device_sysfs_add/remove. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index c578ca135bed..bae3cfc8b4a3 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -224,6 +224,8 @@ int iommu_device_sysfs_add(struct iommu_device *iommu, const struct attribute_group **groups, const char *fmt, ...) __printf(4, 5); void iommu_device_sysfs_remove(struct iommu_device *iommu); +int iommu_device_link(struct iommu_device *iommu, struct device *link); +void iommu_device_unlink(struct iommu_device *iommu, struct device *link); static inline void iommu_device_set_ops(struct iommu_device *iommu, const struct iommu_ops *ops) @@ -294,8 +296,6 @@ extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr, void *data); extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, void *data); -int iommu_device_link(struct device *dev, struct device *link); -void iommu_device_unlink(struct device *dev, struct device *link); /* Window handling function prototypes */ extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, -- cgit v1.2.3 From c73e1ac8b2bc6ab18d9f9a96b17ee7388b49a0c0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 7 Feb 2017 18:18:46 +0100 Subject: iommu: Add iommu_device_set_fwnode() interface Allow to store a fwnode in 'struct iommu_device'; Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bae3cfc8b4a3..626c935edee1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -214,6 +214,7 @@ struct iommu_ops { struct iommu_device { struct list_head list; const struct iommu_ops *ops; + struct fwnode_handle *fwnode; struct device dev; }; @@ -233,6 +234,12 @@ static inline void iommu_device_set_ops(struct iommu_device *iommu, iommu->ops = ops; } +static inline void iommu_device_set_fwnode(struct iommu_device *iommu, + struct fwnode_handle *fwnode) +{ + iommu->fwnode = fwnode; +} + #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ #define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ #define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ @@ -580,6 +587,11 @@ static inline void iommu_device_set_ops(struct iommu_device *iommu, { } +static inline void iommu_device_set_fwnode(struct iommu_device *iommu, + struct fwnode_handle *fwnode) +{ +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } -- cgit v1.2.3 From e99ca98f1d7190c16601b00d0c96212d7c00577d Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Fri, 2 Dec 2016 12:31:44 +0100 Subject: mtd: spi-nor: Add support for S3AN spi-nor devices Xilinx Spartan-3AN FPGAs contain an In-System Flash where they keep their configuration data and (optionally) some user data. The protocol of this flash follows most of the spi-nor standard. With the following differences: - Page size might not be a power of two. - The address calculation (default addressing mode). - The spi nor commands used. Protocol is described on Xilinx User Guide UG333 Signed-off-by: Ricardo Ribalda Delgado Cc: Boris Brezillon Cc: Brian Norris Cc: Marek Vasut Reviewed-by: Marek Vasut Signed-off-by: Cyrille Pitchen --- include/linux/mtd/spi-nor.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index c425c7b4c2a0..4950b2ef08c0 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -68,6 +68,15 @@ #define SPINOR_OP_WRDI 0x04 /* Write disable */ #define SPINOR_OP_AAI_WP 0xad /* Auto address increment word program */ +/* Used for S3AN flashes only */ +#define SPINOR_OP_XSE 0x50 /* Sector erase */ +#define SPINOR_OP_XPP 0x82 /* Page program */ +#define SPINOR_OP_XRDSR 0xd7 /* Read status register */ + +#define XSR_PAGESIZE BIT(0) /* Page size in Po2 or Linear */ +#define XSR_RDY BIT(7) /* Ready */ + + /* Used for Macronix and Winbond flashes. */ #define SPINOR_OP_EN4B 0xb7 /* Enter 4-byte mode */ #define SPINOR_OP_EX4B 0xe9 /* Exit 4-byte mode */ @@ -119,6 +128,9 @@ enum spi_nor_ops { enum spi_nor_option_flags { SNOR_F_USE_FSR = BIT(0), SNOR_F_HAS_SR_TB = BIT(1), + SNOR_F_NO_OP_CHIP_ERASE = BIT(2), + SNOR_F_S3AN_ADDR_DEFAULT = BIT(3), + SNOR_F_READY_XSR_RDY = BIT(4), }; /** -- cgit v1.2.3 From 902cc69a0820252c84c6f7caed350882cea166ba Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 27 Oct 2016 11:55:39 +0200 Subject: mtd: spi-nor: rename SPINOR_OP_* macros of the 4-byte address op codes This patch renames the SPINOR_OP_* macros of the 4-byte address instruction set so the new names all share a common pattern: the 4-byte address name is built from the 3-byte address name appending the "_4B" suffix. The patch also introduces new op codes to support other SPI protocols such as SPI 1-4-4 and SPI 1-2-2. This is a transitional patch and will help a later patch of spi-nor.c to automate the translation from the 3-byte address op codes into their 4-byte address version. Signed-off-by: Cyrille Pitchen Acked-by: Mark Brown Acked-by: Marek Vasut --- include/linux/mtd/spi-nor.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 4950b2ef08c0..f2a718030476 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -43,9 +43,13 @@ #define SPINOR_OP_WRSR 0x01 /* Write status register 1 byte */ #define SPINOR_OP_READ 0x03 /* Read data bytes (low frequency) */ #define SPINOR_OP_READ_FAST 0x0b /* Read data bytes (high frequency) */ -#define SPINOR_OP_READ_1_1_2 0x3b /* Read data bytes (Dual SPI) */ -#define SPINOR_OP_READ_1_1_4 0x6b /* Read data bytes (Quad SPI) */ +#define SPINOR_OP_READ_1_1_2 0x3b /* Read data bytes (Dual Output SPI) */ +#define SPINOR_OP_READ_1_2_2 0xbb /* Read data bytes (Dual I/O SPI) */ +#define SPINOR_OP_READ_1_1_4 0x6b /* Read data bytes (Quad Output SPI) */ +#define SPINOR_OP_READ_1_4_4 0xeb /* Read data bytes (Quad I/O SPI) */ #define SPINOR_OP_PP 0x02 /* Page program (up to 256 bytes) */ +#define SPINOR_OP_PP_1_1_4 0x32 /* Quad page program */ +#define SPINOR_OP_PP_1_4_4 0x38 /* Quad page program */ #define SPINOR_OP_BE_4K 0x20 /* Erase 4KiB block */ #define SPINOR_OP_BE_4K_PMC 0xd7 /* Erase 4KiB block on PMC chips */ #define SPINOR_OP_BE_32K 0x52 /* Erase 32KiB block */ @@ -56,11 +60,17 @@ #define SPINOR_OP_RDFSR 0x70 /* Read flag status register */ /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */ -#define SPINOR_OP_READ4 0x13 /* Read data bytes (low frequency) */ -#define SPINOR_OP_READ4_FAST 0x0c /* Read data bytes (high frequency) */ -#define SPINOR_OP_READ4_1_1_2 0x3c /* Read data bytes (Dual SPI) */ -#define SPINOR_OP_READ4_1_1_4 0x6c /* Read data bytes (Quad SPI) */ +#define SPINOR_OP_READ_4B 0x13 /* Read data bytes (low frequency) */ +#define SPINOR_OP_READ_FAST_4B 0x0c /* Read data bytes (high frequency) */ +#define SPINOR_OP_READ_1_1_2_4B 0x3c /* Read data bytes (Dual Output SPI) */ +#define SPINOR_OP_READ_1_2_2_4B 0xbc /* Read data bytes (Dual I/O SPI) */ +#define SPINOR_OP_READ_1_1_4_4B 0x6c /* Read data bytes (Quad Output SPI) */ +#define SPINOR_OP_READ_1_4_4_4B 0xec /* Read data bytes (Quad I/O SPI) */ #define SPINOR_OP_PP_4B 0x12 /* Page program (up to 256 bytes) */ +#define SPINOR_OP_PP_1_1_4_4B 0x34 /* Quad page program */ +#define SPINOR_OP_PP_1_4_4_4B 0x3e /* Quad page program */ +#define SPINOR_OP_BE_4K_4B 0x21 /* Erase 4KiB block */ +#define SPINOR_OP_BE_32K_4B 0x5c /* Erase 32KiB block */ #define SPINOR_OP_SE_4B 0xdc /* Sector erase (usually 64KiB) */ /* Used for SST flashes only. */ -- cgit v1.2.3 From 2b5e77308f3356faad640b24af6dc5aa7233eb2d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 10 Feb 2017 13:23:23 +0100 Subject: irqdesc: Add a resource managed version of irq_alloc_descs() Add a devres flavor of __devm_irq_alloc_descs() and corresponding helper macros. Signed-off-by: Bartosz Golaszewski Cc: Marc Zyngier Cc: linux-doc@vger.kernel.org Cc: Jonathan Corbet Link: http://lkml.kernel.org/r/1486729403-21132-1-git-send-email-bgolaszewski@baylibre.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index e79875574b39..d915caecafa1 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -715,6 +715,10 @@ unsigned int arch_dynirq_lower_bound(unsigned int from); int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, struct module *owner, const struct cpumask *affinity); +int __devm_irq_alloc_descs(struct device *dev, int irq, unsigned int from, + unsigned int cnt, int node, struct module *owner, + const struct cpumask *affinity); + /* use macros to avoid needing export.h for THIS_MODULE */ #define irq_alloc_descs(irq, from, cnt, node) \ __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE, NULL) @@ -731,6 +735,21 @@ int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, #define irq_alloc_descs_from(from, cnt, node) \ irq_alloc_descs(-1, from, cnt, node) +#define devm_irq_alloc_descs(dev, irq, from, cnt, node) \ + __devm_irq_alloc_descs(dev, irq, from, cnt, node, THIS_MODULE, NULL) + +#define devm_irq_alloc_desc(dev, node) \ + devm_irq_alloc_descs(dev, -1, 0, 1, node) + +#define devm_irq_alloc_desc_at(dev, at, node) \ + devm_irq_alloc_descs(dev, at, at, 1, node) + +#define devm_irq_alloc_desc_from(dev, from, node) \ + devm_irq_alloc_descs(dev, -1, from, 1, node) + +#define devm_irq_alloc_descs_from(dev, from, cnt, node) \ + devm_irq_alloc_descs(dev, -1, from, cnt, node) + void irq_free_descs(unsigned int irq, unsigned int cnt); static inline void irq_free_desc(unsigned int irq) { -- cgit v1.2.3 From d0f6f5832603931b0a8da044fb9abe8289e201ee Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 2 Feb 2017 12:19:12 +0100 Subject: iommu: Remove iommu_register_instance interface And also move its remaining functionality to iommu_device_register() and 'struct iommu_device'. Cc: Rob Herring Cc: Frank Rowand Cc: Matthias Brugger Cc: Marek Szyprowski Cc: devicetree@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 7 ------- include/linux/of_iommu.h | 6 ------ 2 files changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 626c935edee1..9e82fc83765e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -382,8 +382,6 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, const struct iommu_ops *ops); void iommu_fwspec_free(struct device *dev); int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids); -void iommu_register_instance(struct fwnode_handle *fwnode, - const struct iommu_ops *ops); const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode); #else /* CONFIG_IOMMU_API */ @@ -634,11 +632,6 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, return -ENODEV; } -static inline void iommu_register_instance(struct fwnode_handle *fwnode, - const struct iommu_ops *ops) -{ -} - static inline const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) { diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 66fcbc949899..fc4add39361a 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -31,12 +31,6 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev, #endif /* CONFIG_OF_IOMMU */ -static inline void of_iommu_set_ops(struct device_node *np, - const struct iommu_ops *ops) -{ - iommu_register_instance(&np->fwnode, ops); -} - static inline const struct iommu_ops *of_iommu_get_ops(struct device_node *np) { return iommu_ops_from_fwnode(&np->fwnode); -- cgit v1.2.3 From 0508ad1fff11a8b0acdf0333b5fe108d7bd5fce4 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 1 Feb 2017 10:53:41 -0600 Subject: drivers/fsi: Add empty fsi bus definitions This change adds the initial (empty) fsi bus definition, and introduces drivers/fsi/. Signed-off-by: Jeremy Kerr Signed-off-by: Chris Bostic Signed-off-by: Greg Kroah-Hartman --- include/linux/fsi.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/linux/fsi.h (limited to 'include/linux') diff --git a/include/linux/fsi.h b/include/linux/fsi.h new file mode 100644 index 000000000000..47aa181b6404 --- /dev/null +++ b/include/linux/fsi.h @@ -0,0 +1,22 @@ +/* FSI device & driver interfaces + * + * Copyright (C) IBM Corporation 2016 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef LINUX_FSI_H +#define LINUX_FSI_H + +#include + +extern struct bus_type fsi_bus_type; + +#endif /* LINUX_FSI_H */ -- cgit v1.2.3 From fda07a6c94ac5c9bd73d7b0134c6cc6861375341 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 1 Feb 2017 10:53:42 -0600 Subject: drivers/fsi: Add device & driver definitions Add structs for fsi devices & drivers, and struct device conversion functions. Signed-off-by: Jeremy Kerr Signed-off-by: Chris Bostic Signed-off-by: Greg Kroah-Hartman --- include/linux/fsi.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsi.h b/include/linux/fsi.h index 47aa181b6404..f73886a5af01 100644 --- a/include/linux/fsi.h +++ b/include/linux/fsi.h @@ -17,6 +17,17 @@ #include +struct fsi_device { + struct device dev; +}; + +struct fsi_driver { + struct device_driver drv; +}; + +#define to_fsi_dev(devp) container_of(devp, struct fsi_device, dev) +#define to_fsi_drv(drvp) container_of(drvp, struct fsi_driver, drv) + extern struct bus_type fsi_bus_type; #endif /* LINUX_FSI_H */ -- cgit v1.2.3 From dd37eed7db0f7607fa41887c11c1e428faa15d0c Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 1 Feb 2017 10:53:43 -0600 Subject: drivers/fsi: add driver to device matches Driver bind to devices based on the engine types & (optional) versions. Signed-off-by: Jeremy Kerr Signed-off-by: Chris Bostic Signed-off-by: Greg Kroah-Hartman --- include/linux/fsi.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsi.h b/include/linux/fsi.h index f73886a5af01..273cbf6400ea 100644 --- a/include/linux/fsi.h +++ b/include/linux/fsi.h @@ -18,11 +18,28 @@ #include struct fsi_device { - struct device dev; + struct device dev; + u8 engine_type; + u8 version; }; +struct fsi_device_id { + u8 engine_type; + u8 version; +}; + +#define FSI_VERSION_ANY 0 + +#define FSI_DEVICE(t) \ + .engine_type = (t), .version = FSI_VERSION_ANY, + +#define FSI_DEVICE_VERSIONED(t, v) \ + .engine_type = (t), .version = (v), + + struct fsi_driver { - struct device_driver drv; + struct device_driver drv; + const struct fsi_device_id *id_table; }; #define to_fsi_dev(devp) container_of(devp, struct fsi_device, dev) -- cgit v1.2.3 From baa6d396635129d8a67793e884f3b2182c7354b3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 1 Feb 2017 12:48:44 -0700 Subject: fpga: Add scatterlist based programming Requiring contiguous kernel memory is not a good idea, this is a limited resource and allocation can fail under normal work loads. This introduces a .write_sg op that supporting drivers can provide to DMA directly from dis-contiguous memory and a new entry point fpga_mgr_buf_load_sg that users can call to directly provide page lists. The full matrix of compatibility is provided, either the linear or sg interface can be used by the user with a driver supporting either interface. A notable change for drivers is that the .write op can now be called multiple times. Signed-off-by: Jason Gunthorpe Acked-by: Alan Tull Acked-by: Moritz Fischer Signed-off-by: Greg Kroah-Hartman --- include/linux/fpga/fpga-mgr.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index 16551d5eac36..57beb5d09bfc 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -22,6 +22,7 @@ #define _LINUX_FPGA_MGR_H struct fpga_manager; +struct sg_table; /** * enum fpga_mgr_states - fpga framework states @@ -88,6 +89,7 @@ struct fpga_image_info { * @state: returns an enum value of the FPGA's state * @write_init: prepare the FPGA to receive confuration data * @write: write count bytes of configuration data to the FPGA + * @write_sg: write the scatter list of configuration data to the FPGA * @write_complete: set FPGA to operating state after writing is done * @fpga_remove: optional: Set FPGA into a specific state during driver remove * @@ -102,6 +104,7 @@ struct fpga_manager_ops { struct fpga_image_info *info, const char *buf, size_t count); int (*write)(struct fpga_manager *mgr, const char *buf, size_t count); + int (*write_sg)(struct fpga_manager *mgr, struct sg_table *sgt); int (*write_complete)(struct fpga_manager *mgr, struct fpga_image_info *info); void (*fpga_remove)(struct fpga_manager *mgr); @@ -129,6 +132,8 @@ struct fpga_manager { int fpga_mgr_buf_load(struct fpga_manager *mgr, struct fpga_image_info *info, const char *buf, size_t count); +int fpga_mgr_buf_load_sg(struct fpga_manager *mgr, struct fpga_image_info *info, + struct sg_table *sgt); int fpga_mgr_firmware_load(struct fpga_manager *mgr, struct fpga_image_info *info, -- cgit v1.2.3 From f6c4391553573d592212e6624cfba5e2752cd5c8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 5 Feb 2017 17:20:33 -0700 Subject: vmbus: remove no longer used signal_policy The explicit signal policy is no longer used. A different mechanism will be added later when xmit_more is supported. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index c37a4a145036..7795966af0f9 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -684,11 +684,6 @@ struct hv_input_signal_event_buffer { struct hv_input_signal_event event; }; -enum hv_signal_policy { - HV_SIGNAL_POLICY_DEFAULT = 0, - HV_SIGNAL_POLICY_EXPLICIT, -}; - enum hv_numa_policy { HV_BALANCED = 0, HV_LOCALIZED, @@ -850,13 +845,6 @@ struct vmbus_channel { * link up channels based on their CPU affinity. */ struct list_head percpu_list; - /* - * Host signaling policy: The default policy will be - * based on the ring buffer state. We will also support - * a policy where the client driver can have explicit - * signaling control. - */ - enum hv_signal_policy signal_policy; /* * On the channel send side, many of the VMBUS * device drivers explicity serialize access to the @@ -918,12 +906,6 @@ static inline bool is_hvsock_channel(const struct vmbus_channel *c) VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER); } -static inline void set_channel_signal_state(struct vmbus_channel *c, - enum hv_signal_policy policy) -{ - c->signal_policy = policy; -} - static inline void set_channel_affinity_state(struct vmbus_channel *c, enum hv_numa_policy policy) { -- cgit v1.2.3 From 3454323c954775098871559b5c23d877c3e23f77 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 5 Feb 2017 17:20:34 -0700 Subject: vmbus: remove unused kickq argument to sendpacket Since sendpacket no longer uses kickq argument remove it. Remove it no longer used xmit_more in sendpacket in netvsc as well. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 7795966af0f9..e208e6437f5b 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1037,8 +1037,7 @@ extern int vmbus_sendpacket_ctl(struct vmbus_channel *channel, u32 bufferLen, u64 requestid, enum vmbus_packet_type type, - u32 flags, - bool kick_q); + u32 flags); extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, struct hv_page_buffer pagebuffers[], @@ -1053,8 +1052,7 @@ extern int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, void *buffer, u32 bufferlen, u64 requestid, - u32 flags, - bool kick_q); + u32 flags); extern int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, struct hv_multipage_buffer *mpb, -- cgit v1.2.3 From f1ba82616c3368e1ae9e64ef29cf3edc1be0860d Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 7 Feb 2017 18:24:43 +0100 Subject: blk-mq: pass bio to blk_mq_sched_get_rq_priv bio is used in bfq-mq's get_rq_priv, to get the request group. We could pass directly the group here, but I thought that passing the bio was more general, giving the possibility to get other pieces of information if needed. Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- include/linux/elevator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 8265b6330cc2..aebecc4ed088 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -109,7 +109,7 @@ struct elevator_mq_ops { void (*requeue_request)(struct request *); struct request *(*former_request)(struct request_queue *, struct request *); struct request *(*next_request)(struct request_queue *, struct request *); - int (*get_rq_priv)(struct request_queue *, struct request *); + int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *); void (*put_rq_priv)(struct request_queue *, struct request *); void (*init_icq)(struct io_cq *); void (*exit_icq)(struct io_cq *); -- cgit v1.2.3 From ff548773106ec7f8031bc6172e0234bd2a02c19c Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 10 Feb 2017 16:34:07 +0000 Subject: afs: Move UUID struct to linux/uuid.h Move the afs_uuid struct to linux/uuid.h, rename it to uuid_v1 and change the u16/u32 fields to __be16/__be32 instead so that the structure can be cast to a 16-octet network-order buffer. Signed-off-by: David Howells Reviewed-by: Arnd Bergmann +/* + * V1 (time-based) UUID definition [RFC 4122]. + * - the timestamp is a 60-bit value, split 32/16/12, and goes in 100ns + * increments since midnight 15th October 1582 + * - add AFS_UUID_TO_UNIX_TIME to convert unix time in 100ns units to UUID + * time + * - the clock sequence is a 14-bit counter to avoid duplicate times + */ +struct uuid_v1 { + __be32 time_low; /* low part of timestamp */ + __be16 time_mid; /* mid part of timestamp */ + __be16 time_hi_and_version; /* high part of timestamp and version */ +#define UUID_TO_UNIX_TIME 0x01b21dd213814000ULL +#define UUID_TIMEHI_MASK 0x0fff +#define UUID_VERSION_TIME 0x1000 /* time-based UUID */ +#define UUID_VERSION_NAME 0x3000 /* name-based UUID */ +#define UUID_VERSION_RANDOM 0x4000 /* (pseudo-)random generated UUID */ + u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */ +#define UUID_CLOCKHI_MASK 0x3f +#define UUID_VARIANT_STD 0x80 + u8 clock_seq_low; /* clock seq low */ + u8 node[6]; /* spatially unique node ID (MAC addr) */ +}; + /* * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee") * not including trailing NUL. -- cgit v1.2.3 From 28309572aac4c632666053dc8bf9906a3594b8d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Thu, 9 Feb 2017 10:21:07 +0100 Subject: mtd: name the mtd device with an optional label property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can be used to easily identify a specific chip on a system with multiple chips. Suggested-by: Boris Brezillon Signed-off-by: Cédric Le Goater Reviewed-by: Marek Vasut Signed-off-by: Brian Norris --- include/linux/mtd/mtd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 5bb42c6dacdc..eebdc63cf6af 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -386,6 +387,8 @@ static inline void mtd_set_of_node(struct mtd_info *mtd, struct device_node *np) { mtd->dev.of_node = np; + if (!mtd->name) + of_property_read_string(np, "label", &mtd->name); } static inline struct device_node *mtd_get_of_node(struct mtd_info *mtd) -- cgit v1.2.3 From ea6da4fd388a1eeab30d64da3eab3c5338714c74 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Tue, 7 Feb 2017 09:56:06 +0200 Subject: net/skbuff: Introduce skb_mac_offset() Introduce skb_mac_offset() that could be used to get mac header offset. Signed-off-by: Amir Vadai Reviewed-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f1adddc1c5ac..69ccd2636911 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2184,6 +2184,11 @@ static inline unsigned char *skb_mac_header(const struct sk_buff *skb) return skb->head + skb->mac_header; } +static inline int skb_mac_offset(const struct sk_buff *skb) +{ + return skb_mac_header(skb) - skb->data; +} + static inline int skb_mac_header_was_set(const struct sk_buff *skb) { return skb->mac_header != (typeof(skb->mac_header))~0U; -- cgit v1.2.3 From 47feb41888bc82ba5c9268c344775adc483d6de7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Feb 2017 18:17:43 +0100 Subject: PCI/MSI: Remove unused pci_msi_create_default_irq_domain() pci_msi_create_default_irq_domain() is never called in the whole tree, so remove it as well as all the supporting code for a default PCI MSI domain. Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner --- include/linux/msi.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index 0db320b7bb15..18b8566b3ce3 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -319,9 +319,6 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, int pci_msi_domain_alloc_irqs(struct irq_domain *domain, struct pci_dev *dev, int nvec, int type); void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev); -struct irq_domain *pci_msi_create_default_irq_domain(struct fwnode_handle *fwnode, - struct msi_domain_info *info, struct irq_domain *parent); - irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev, struct msi_desc *desc); int pci_msi_domain_check_cap(struct irq_domain *domain, -- cgit v1.2.3 From 699c4cec238731a4c466f73fe6e9e45ab6f49a41 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Feb 2017 18:17:44 +0100 Subject: PCI/MSI: Remove pci_msi_domain_{alloc,free}_irqs() Just call the msi_* version directly instead of having trivial wrappers for one or two callsites. Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner --- include/linux/msi.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index 18b8566b3ce3..1b6f3ebbe876 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -316,9 +316,6 @@ void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg); struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -int pci_msi_domain_alloc_irqs(struct irq_domain *domain, struct pci_dev *dev, - int nvec, int type); -void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev); irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev, struct msi_desc *desc); int pci_msi_domain_check_cap(struct irq_domain *domain, -- cgit v1.2.3 From 1697599ee301a52cded6499a09bd609f7f63fd06 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 9 Feb 2017 09:17:27 -0800 Subject: bitfield.h: add FIELD_FIT() helper Add a helper for checking at runtime that a value will fit inside a specified field/mask. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/bitfield.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index f6505d83069d..8b9d6fff002d 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -62,6 +62,19 @@ (1ULL << __bf_shf(_mask))); \ }) +/** + * FIELD_FIT() - check if value fits in the field + * @_mask: shifted mask defining the field's length and position + * @_val: value to test against the field + * + * Return: true if @_val can fit inside @_mask, false if @_val is too big. + */ +#define FIELD_FIT(_mask, _val) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_FIT: "); \ + !((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \ + }) + /** * FIELD_PREP() - prepare a bitfield element * @_mask: shifted mask defining the field's length and position -- cgit v1.2.3 From a8e04698732736f59fefe72c675791a006b76e1d Mon Sep 17 00:00:00 2001 From: Sainath Grandhi Date: Fri, 10 Feb 2017 16:03:46 -0800 Subject: tap: Refactoring macvtap.c macvtap module has code for tap/queue management and link management. This patch splits the code into macvtap_main.c for link management and tap.c for tap/queue management. Functionality in tap.c can be re-used for implementing tap on other virtual interfaces. Signed-off-by: Sainath Grandhi Signed-off-by: David S. Miller --- include/linux/if_macvtap.h | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 include/linux/if_macvtap.h (limited to 'include/linux') diff --git a/include/linux/if_macvtap.h b/include/linux/if_macvtap.h new file mode 100644 index 000000000000..c9bf84b75b27 --- /dev/null +++ b/include/linux/if_macvtap.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_IF_MACVTAP_H_ +#define _LINUX_IF_MACVTAP_H_ + +rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb); +void macvtap_del_queues(struct net_device *dev); +int macvtap_get_minor(struct macvlan_dev *vlan); +void macvtap_free_minor(struct macvlan_dev *vlan); +int macvtap_queue_resize(struct macvlan_dev *vlan); + +#endif /*_LINUX_IF_MACVTAP_H_*/ -- cgit v1.2.3 From 635b8c8ecdd27142d7fdab0df334b2e9201481cf Mon Sep 17 00:00:00 2001 From: Sainath Grandhi Date: Fri, 10 Feb 2017 16:03:47 -0800 Subject: tap: Renaming tap related APIs, data structures, macros Renaming tap related APIs, data structures and macros in tap.c from macvtap_.* to tap_.* Signed-off-by: Sainath Grandhi Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 17 ++--------------- include/linux/if_macvtap.h | 10 ---------- include/linux/if_tap.h | 23 +++++++++++++++++++++++ 3 files changed, 25 insertions(+), 25 deletions(-) delete mode 100644 include/linux/if_macvtap.h create mode 100644 include/linux/if_tap.h (limited to 'include/linux') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index a4ccc3122f93..c9ec1343d187 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -9,19 +9,6 @@ #include #include -#if IS_ENABLED(CONFIG_MACVTAP) -struct socket *macvtap_get_socket(struct file *); -#else -#include -#include -struct file; -struct socket; -static inline struct socket *macvtap_get_socket(struct file *f) -{ - return ERR_PTR(-EINVAL); -} -#endif /* CONFIG_MACVTAP */ - struct macvlan_port; struct macvtap_queue; @@ -29,7 +16,7 @@ struct macvtap_queue; * Maximum times a macvtap device can be opened. This can be used to * configure the number of receive queue, e.g. for multiqueue virtio. */ -#define MAX_MACVTAP_QUEUES 256 +#define MAX_TAP_QUEUES 256 #define MACVLAN_MC_FILTER_BITS 8 #define MACVLAN_MC_FILTER_SZ (1 << MACVLAN_MC_FILTER_BITS) @@ -49,7 +36,7 @@ struct macvlan_dev { enum macvlan_mode mode; u16 flags; /* This array tracks active taps. */ - struct macvtap_queue __rcu *taps[MAX_MACVTAP_QUEUES]; + struct tap_queue __rcu *taps[MAX_TAP_QUEUES]; /* This list tracks all taps (both enabled and disabled) */ struct list_head queue_list; int numvtaps; diff --git a/include/linux/if_macvtap.h b/include/linux/if_macvtap.h deleted file mode 100644 index c9bf84b75b27..000000000000 --- a/include/linux/if_macvtap.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _LINUX_IF_MACVTAP_H_ -#define _LINUX_IF_MACVTAP_H_ - -rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb); -void macvtap_del_queues(struct net_device *dev); -int macvtap_get_minor(struct macvlan_dev *vlan); -void macvtap_free_minor(struct macvlan_dev *vlan); -int macvtap_queue_resize(struct macvlan_dev *vlan); - -#endif /*_LINUX_IF_MACVTAP_H_*/ diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h new file mode 100644 index 000000000000..97d27b8ebd55 --- /dev/null +++ b/include/linux/if_tap.h @@ -0,0 +1,23 @@ +#ifndef _LINUX_IF_TAP_H_ +#define _LINUX_IF_TAP_H_ + +#if IS_ENABLED(CONFIG_MACVTAP) +struct socket *tap_get_socket(struct file *); +#else +#include +#include +struct file; +struct socket; +static inline struct socket *tap_get_socket(struct file *f) +{ + return ERR_PTR(-EINVAL); +} +#endif /* CONFIG_MACVTAP */ + +rx_handler_result_t tap_handle_frame(struct sk_buff **pskb); +void tap_del_queues(struct net_device *dev); +int tap_get_minor(struct macvlan_dev *vlan); +void tap_free_minor(struct macvlan_dev *vlan); +int tap_queue_resize(struct macvlan_dev *vlan); + +#endif /*_LINUX_IF_TAP_H_*/ -- cgit v1.2.3 From ebc05ba7e8600b52a2a0c87a43105143368aca2a Mon Sep 17 00:00:00 2001 From: Sainath Grandhi Date: Fri, 10 Feb 2017 16:03:48 -0800 Subject: tap: Tap character device creation/destroy API This patch provides tap device create/destroy APIs in tap.c. Signed-off-by: Sainath Grandhi Signed-off-by: David S. Miller --- include/linux/if_tap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 97d27b8ebd55..a2dfd9063a6c 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -19,5 +19,8 @@ void tap_del_queues(struct net_device *dev); int tap_get_minor(struct macvlan_dev *vlan); void tap_free_minor(struct macvlan_dev *vlan); int tap_queue_resize(struct macvlan_dev *vlan); +int tap_create_cdev(struct cdev *tap_cdev, + dev_t *tap_major, const char *device_name); +void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev); #endif /*_LINUX_IF_TAP_H_*/ -- cgit v1.2.3 From 6fe3faf86757eb7f078ff06b23b206f17dc4fb36 Mon Sep 17 00:00:00 2001 From: Sainath Grandhi Date: Fri, 10 Feb 2017 16:03:49 -0800 Subject: tap: Abstract type of virtual interface from tap implementation macvlan object is re-structured to hold tap related elements in a separate entity, tap_dev. Upon NETDEV_REGISTER device_event, tap_dev is registered with idr and fetched again on tap_open. Few of the tap functions are modified to accepted tap_dev as argument. tap_dev object includes callbacks to be used by underlying virtual interface to take care of tx and rx accounting. Signed-off-by: Sainath Grandhi Signed-off-by: David S. Miller --- include/linux/if_tap.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index a2dfd9063a6c..75031e5d0a65 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -14,11 +14,60 @@ static inline struct socket *tap_get_socket(struct file *f) } #endif /* CONFIG_MACVTAP */ +#include +#include + +#define MAX_TAP_QUEUES 256 + +struct tap_queue; + +struct tap_dev { + struct net_device *dev; + u16 flags; + /* This array tracks active taps. */ + struct tap_queue __rcu *taps[MAX_TAP_QUEUES]; + /* This list tracks all taps (both enabled and disabled) */ + struct list_head queue_list; + int numvtaps; + int numqueues; + netdev_features_t tap_features; + int minor; + + void (*update_features)(struct tap_dev *tap, netdev_features_t features); + void (*count_tx_dropped)(struct tap_dev *tap); + void (*count_rx_dropped)(struct tap_dev *tap); +}; + +/* + * A tap queue is the central object of tap module, it connects + * an open character device to virtual interface. There can be + * multiple queues on one interface, which map back to queues + * implemented in hardware on the underlying device. + * + * tap_proto is used to allocate queues through the sock allocation + * mechanism. + * + */ + +struct tap_queue { + struct sock sk; + struct socket sock; + struct socket_wq wq; + int vnet_hdr_sz; + struct tap_dev __rcu *tap; + struct file *file; + unsigned int flags; + u16 queue_index; + bool enabled; + struct list_head next; + struct skb_array skb_array; +}; + rx_handler_result_t tap_handle_frame(struct sk_buff **pskb); -void tap_del_queues(struct net_device *dev); -int tap_get_minor(struct macvlan_dev *vlan); -void tap_free_minor(struct macvlan_dev *vlan); -int tap_queue_resize(struct macvlan_dev *vlan); +void tap_del_queues(struct tap_dev *tap); +int tap_get_minor(struct tap_dev *tap); +void tap_free_minor(struct tap_dev *tap); +int tap_queue_resize(struct tap_dev *tap); int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major, const char *device_name); void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev); -- cgit v1.2.3 From d9f1f61c0801a73ff36d416a7ede54229b231e1d Mon Sep 17 00:00:00 2001 From: Sainath Grandhi Date: Fri, 10 Feb 2017 16:03:50 -0800 Subject: tap: Extending tap device create/destroy APIs Extending tap APIs get/free_minor and create/destroy_cdev to handle more than one type of virtual interface. Signed-off-by: Sainath Grandhi Signed-off-by: David S. Miller --- include/linux/if_tap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 75031e5d0a65..362e71c16efb 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -65,8 +65,8 @@ struct tap_queue { rx_handler_result_t tap_handle_frame(struct sk_buff **pskb); void tap_del_queues(struct tap_dev *tap); -int tap_get_minor(struct tap_dev *tap); -void tap_free_minor(struct tap_dev *tap); +int tap_get_minor(dev_t major, struct tap_dev *tap); +void tap_free_minor(dev_t major, struct tap_dev *tap); int tap_queue_resize(struct tap_dev *tap); int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major, const char *device_name); -- cgit v1.2.3 From 9a393b5d5988ea4eaa3e0da138321abe0dc03a68 Mon Sep 17 00:00:00 2001 From: Sainath Grandhi Date: Fri, 10 Feb 2017 16:03:51 -0800 Subject: tap: tap as an independent module This patch makes tap a separate module for other types of virtual interfaces, for example, ipvlan to use. Signed-off-by: Sainath Grandhi Signed-off-by: David S. Miller --- include/linux/if_tap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 362e71c16efb..3482c3c2037d 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -1,7 +1,7 @@ #ifndef _LINUX_IF_TAP_H_ #define _LINUX_IF_TAP_H_ -#if IS_ENABLED(CONFIG_MACVTAP) +#if IS_ENABLED(CONFIG_TAP) struct socket *tap_get_socket(struct file *); #else #include @@ -12,7 +12,7 @@ static inline struct socket *tap_get_socket(struct file *f) { return ERR_PTR(-EINVAL); } -#endif /* CONFIG_MACVTAP */ +#endif /* CONFIG_TAP */ #include #include -- cgit v1.2.3 From 8c4d4e8b5626fec965fd5034e5bd5e57790f243f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 10 Feb 2017 12:08:17 +0100 Subject: netfilter: nfnetlink: allow to check for generation ID This patch allows userspace to specify the generation ID that has been used to build an incremental batch update. If userspace specifies the generation ID in the batch message as attribute, then nfnetlink compares it to the current generation ID so you make sure that you work against the right baseline. Otherwise, bail out with ERESTART so userspace knows that its changeset is stale and needs to respin. Userspace can do this transparently at the cost of taking slightly more time to refresh caches and rework the changeset. This check is optional, if there is no NFNL_BATCH_GENID attribute in the batch begin message, then no check is performed. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 1d82dd5e9a08..1b49209dd5c7 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -28,6 +28,7 @@ struct nfnetlink_subsystem { const struct nfnl_callback *cb; /* callback for individual types */ int (*commit)(struct net *net, struct sk_buff *skb); int (*abort)(struct net *net, struct sk_buff *skb); + bool (*valid_genid)(struct net *net, u32 genid); }; int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); -- cgit v1.2.3 From 7f677633379b4abb3281cdbe7e7006f049305c03 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 10 Feb 2017 20:28:24 -0800 Subject: bpf: introduce BPF_F_ALLOW_OVERRIDE flag If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command to the given cgroup the descendent cgroup will be able to override effective bpf program that was inherited from this cgroup. By default it's not passed, therefore override is disallowed. Examples: 1. prog X attached to /A with default prog Y fails to attach to /A/B and /A/B/C Everything under /A runs prog X 2. prog X attached to /A with allow_override. prog Y fails to attach to /A/B with default (non-override) prog M attached to /A/B with allow_override. Everything under /A/B runs prog M only. 3. prog X attached to /A with allow_override. prog Y fails to attach to /A with default. The user has to detach first to switch the mode. In the future this behavior may be extended with a chain of non-overridable programs. Also fix the bug where detach from cgroup where nothing is attached was not throwing error. Return ENOENT in such case. Add several testcases and adjust libbpf. Fixes: 3007098494be ("cgroup: add support for eBPF programs") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Tejun Heo Acked-by: Daniel Mack Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 92bc89ae7e20..c970a25d2a49 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -21,20 +21,19 @@ struct cgroup_bpf { */ struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE]; struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE]; + bool disallow_override[MAX_BPF_ATTACH_TYPE]; }; void cgroup_bpf_put(struct cgroup *cgrp); void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent); -void __cgroup_bpf_update(struct cgroup *cgrp, - struct cgroup *parent, - struct bpf_prog *prog, - enum bpf_attach_type type); +int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, + struct bpf_prog *prog, enum bpf_attach_type type, + bool overridable); /* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */ -void cgroup_bpf_update(struct cgroup *cgrp, - struct bpf_prog *prog, - enum bpf_attach_type type); +int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, bool overridable); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, -- cgit v1.2.3 From d6cffbbe9a7e51eb705182965a189457c17ba8a3 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 10 Feb 2017 10:35:02 +0300 Subject: proc/sysctl: prune stale dentries during unregistering Currently unregistering sysctl table does not prune its dentries. Stale dentries could slowdown sysctl operations significantly. For example, command: # for i in {1..100000} ; do unshare -n -- sysctl -a &> /dev/null ; done creates a millions of stale denties around sysctls of loopback interface: # sysctl fs.dentry-state fs.dentry-state = 25812579 24724135 45 0 0 0 All of them have matching names thus lookup have to scan though whole hash chain and call d_compare (proc_sys_compare) which checks them under system-wide spinlock (sysctl_lock). # time sysctl -a > /dev/null real 1m12.806s user 0m0.016s sys 1m12.400s Currently only memory reclaimer could remove this garbage. But without significant memory pressure this never happens. This patch collects sysctl inodes into list on sysctl table header and prunes all their dentries once that table unregisters. Konstantin Khlebnikov writes: > On 10.02.2017 10:47, Al Viro wrote: >> how about >> the matching stats *after* that patch? > > dcache size doesn't grow endlessly, so stats are fine > > # sysctl fs.dentry-state > fs.dentry-state = 92712 58376 45 0 0 0 > > # time sysctl -a &>/dev/null > > real 0m0.013s > user 0m0.004s > sys 0m0.008s Signed-off-by: Konstantin Khlebnikov Suggested-by: Al Viro Signed-off-by: Eric W. Biederman --- include/linux/sysctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index adf4e51cf597..b7e82049fec7 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -143,6 +143,7 @@ struct ctl_table_header struct ctl_table_set *set; struct ctl_dir *parent; struct ctl_node *node; + struct list_head inodes; /* head for proc_inode->sysctl_inodes */ }; struct ctl_dir { -- cgit v1.2.3 From 34a23327697d558a582e356573d7d36773996fbb Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 5 Dec 2016 13:15:28 +0100 Subject: mfd: axp20x: Correct a typo in axp20x_device_remove documentation The documentation of axp20x_device_remove() have a typo and use axp20x_device_probe() as name. This patch fix this typo. Signed-off-by: Corentin Labbe Acked-by: Chen-Yu Tsai Signed-off-by: Lee Jones --- include/linux/mfd/axp20x.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 812806d6319b..39f1da18c917 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -582,7 +582,7 @@ int axp20x_match_device(struct axp20x_dev *axp20x); int axp20x_device_probe(struct axp20x_dev *axp20x); /** - * axp20x_device_probe(): Remove a axp20x device + * axp20x_device_remove(): Remove a axp20x device * * @axp20x: axp20x device to remove * -- cgit v1.2.3 From 0a5454c901aea0fef99f5ef7910c69c501817ae1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 14 Dec 2016 14:52:05 +0100 Subject: mfd: axp20x: Use IRQF_TRIGGER_LOW on the axp288 The interrupt line of the entire family of axp2xx pmics is active-low, for devicetree enumerated irqs, this is dealt with in the devicetree. ACPI irq resources have a flag field for this too, I tried using this on my CUBE iwork8 Air tablet, but it does not contain the right data. The dstd shows the irq listed as either ActiveLow or ActiveHigh, depending on the OSID variable, which seems to be set by the "OS IMAGE ID" in the BIOS/EFI setup screen. Since the acpi-resource info is no good, simply pass in IRQF_TRIGGER_LOW on the axp288. Together with the other axp288 fixes in this series, this fixes the axp288 irq contineously triggering. Signed-off-by: Hans de Goede Acked-by: Chen-Yu Tsai Signed-off-by: Lee Jones --- include/linux/mfd/axp20x.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 39f1da18c917..6d5dd3f96d33 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -523,6 +523,7 @@ enum axp809_irqs { struct axp20x_dev { struct device *dev; int irq; + unsigned long irq_flags; struct regmap *regmap; struct regmap_irq_chip_data *regmap_irqc; long variant; -- cgit v1.2.3 From 59f10f7e62a28b9b664a9b2efbc5bd10d8d751f0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 14 Dec 2016 14:52:08 +0100 Subject: mfd: axp20x: Drop wrong AXP288_PMIC_ADC_EN define The adc-enable register for the axp288 is 0x82, not 0x84. 0x82 is already defined as AXP20X_ADC_EN1 and that is what the axp288_adc driver is actually using, so simply drop the wrong AXP288_PMIC_ADC_EN define. Signed-off-by: Hans de Goede Acked-by: Chen-Yu Tsai Signed-off-by: Lee Jones --- include/linux/mfd/axp20x.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 6d5dd3f96d33..35418ccb3bb7 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -238,7 +238,6 @@ enum { #define AXP288_PMIC_ADC_H 0x56 #define AXP288_PMIC_ADC_L 0x57 #define AXP288_ADC_TS_PIN_CTRL 0x84 -#define AXP288_PMIC_ADC_EN 0x84 /* Fuel Gauge */ #define AXP288_FG_RDC1_REG 0xba -- cgit v1.2.3 From 178e8351cef3607821ec9a4e2fe3f414145fdf34 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 16 Dec 2016 21:09:05 +0100 Subject: mfd: axp20x: Add a few missing defines for AXP288 specific registers Add defines for the AXP288_POWER_REASON and AXP288_RT_BATT_V_H and AXP288_RT_BATT_V_L and AXP288_BC_* registers. While at it also move the AXP288_TS_ADC_H-AXP288_GP_ADC_L defines, which for some reason where in a different place, together with the rest of the AXP288 specific defines. Signed-off-by: Hans de Goede Acked-by: Chen-Yu Tsai Signed-off-by: Lee Jones --- include/linux/mfd/axp20x.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 35418ccb3bb7..0aa4ef7157b8 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -235,9 +235,20 @@ enum { #define AXP22X_BATLOW_THRES1 0xe6 /* AXP288 specific registers */ +#define AXP288_POWER_REASON 0x02 +#define AXP288_BC_GLOBAL 0x2c +#define AXP288_BC_VBUS_CNTL 0x2d +#define AXP288_BC_USB_STAT 0x2e +#define AXP288_BC_DET_STAT 0x2f #define AXP288_PMIC_ADC_H 0x56 #define AXP288_PMIC_ADC_L 0x57 +#define AXP288_TS_ADC_H 0x58 +#define AXP288_TS_ADC_L 0x59 +#define AXP288_GP_ADC_H 0x5a +#define AXP288_GP_ADC_L 0x5b #define AXP288_ADC_TS_PIN_CTRL 0x84 +#define AXP288_RT_BATT_V_H 0xa0 +#define AXP288_RT_BATT_V_L 0xa1 /* Fuel Gauge */ #define AXP288_FG_RDC1_REG 0xba @@ -514,11 +525,6 @@ enum axp809_irqs { AXP809_IRQ_GPIO0_INPUT, }; -#define AXP288_TS_ADC_H 0x58 -#define AXP288_TS_ADC_L 0x59 -#define AXP288_GP_ADC_H 0x5a -#define AXP288_GP_ADC_L 0x5b - struct axp20x_dev { struct device *dev; int irq; -- cgit v1.2.3 From a042a7a4e1ce31380d1370a4d45a3f48d0972655 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 28 Dec 2016 21:55:47 +0000 Subject: mfd: abx500: Fix spelling mistake: "Celcius" -> "Celsius" Trivial fix to spelling mistake in MFD headers. Signed-off-by: Colin Ian King Signed-off-by: Lee Jones --- include/linux/mfd/abx500.h | 2 +- include/linux/mfd/abx500/ab8500-bm.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h index 552cc1d61cc7..44412c9d26e1 100644 --- a/include/linux/mfd/abx500.h +++ b/include/linux/mfd/abx500.h @@ -45,7 +45,7 @@ enum abx500_adc_therm { * struct abx500_res_to_temp - defines one point in a temp to res curve. To * be used in battery packs that combines the identification resistor with a * NTC resistor. - * @temp: battery pack temperature in Celcius + * @temp: battery pack temperature in Celsius * @resist: NTC resistor net total resistance */ struct abx500_res_to_temp { diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h index 12a5b396921e..e63681eb6c62 100644 --- a/include/linux/mfd/abx500/ab8500-bm.h +++ b/include/linux/mfd/abx500/ab8500-bm.h @@ -279,7 +279,7 @@ enum bup_vch_sel { * struct res_to_temp - defines one point in a temp to res curve. To * be used in battery packs that combines the identification resistor with a * NTC resistor. - * @temp: battery pack temperature in Celcius + * @temp: battery pack temperature in Celsius * @resist: NTC resistor net total resistance */ struct res_to_temp { @@ -290,7 +290,7 @@ struct res_to_temp { /** * struct batres_vs_temp - defines one point in a temp vs battery internal * resistance curve. - * @temp: battery pack temperature in Celcius + * @temp: battery pack temperature in Celsius * @resist: battery internal reistance in mOhm */ struct batres_vs_temp { -- cgit v1.2.3 From a9eb186e13144782232cc6fa731441be54baf505 Mon Sep 17 00:00:00 2001 From: Joseph Lo Date: Fri, 16 Dec 2016 18:57:36 +0100 Subject: mfd: cros_ec: Prevent data transfer while device is suspended The cros_ec driver is still active while the device is suspended. Besides that, it also tries to transfer data even after the I2C host had been suspended. This patch uses a simple flag to prevent this. Signed-off-by: Joseph Lo Signed-off-by: Thierry Escande Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index f62043a75f43..7a01c94496f1 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -103,6 +103,7 @@ struct cros_ec_command { * @din_size: size of din buffer to allocate (zero to use static din) * @dout_size: size of dout buffer to allocate (zero to use static dout) * @wake_enabled: true if this device can wake the system from sleep + * @suspended: true if this device had been suspended * @cmd_xfer: send command to EC and get response * Returns the number of bytes received if the communication succeeded, but * that doesn't mean the EC was happy with the command. The caller @@ -136,6 +137,7 @@ struct cros_ec_device { int din_size; int dout_size; bool wake_enabled; + bool suspended; int (*cmd_xfer)(struct cros_ec_device *ec, struct cros_ec_command *msg); int (*pkt_xfer)(struct cros_ec_device *ec, -- cgit v1.2.3 From f00c06fd98576face871e62bb3aa045c5f647661 Mon Sep 17 00:00:00 2001 From: Shawn Nematbakhsh Date: Fri, 16 Dec 2016 18:57:37 +0100 Subject: mfd: cros_ec: Send suspend state notification to EC Notify EC when going to or returning from suspend so that proper actions related to wake events can be taken. Signed-off-by: Shawn Nematbakhsh Signed-off-by: Thierry Escande Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec_commands.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index da1c188562bc..34ecae4d612c 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -2547,6 +2547,20 @@ struct ec_params_ext_power_current_limit { uint32_t limit; /* in mA */ } __packed; +/* Inform the EC when entering a sleep state */ +#define EC_CMD_HOST_SLEEP_EVENT 0xa9 + +enum host_sleep_event { + HOST_SLEEP_EVENT_S3_SUSPEND = 1, + HOST_SLEEP_EVENT_S3_RESUME = 2, + HOST_SLEEP_EVENT_S0IX_SUSPEND = 3, + HOST_SLEEP_EVENT_S0IX_RESUME = 4 +}; + +struct ec_params_host_sleep_event { + uint8_t sleep_event; +} __packed; + /*****************************************************************************/ /* Smart battery pass-through */ -- cgit v1.2.3 From 56e1d40d3beab2f247d48574bf51fc5daeebc285 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 5 Jan 2017 16:44:39 -0800 Subject: mfd: cpcap: Add minimal support Many Motorola phones like droid 4 are using a custom PMIC called CPCAP or 6556002. We can support it's core features quite easily with regmap_spi and regmap_irq. The children of cpcap, such as regulators, ADC and USB, can be just regular device drivers and defined in the dts file. They get probed as we call of_platform_populate() at the end of our probe, and then the children can just call dev_get_regmap(dev.parent, NULL) to get the regmap. Cc: devicetree@vger.kernel.org Cc: Marcel Partap Cc: Mark Rutland Cc: Michael Scott Acked-by: Rob Herring Signed-off-by: Tony Lindgren Signed-off-by: Lee Jones --- include/linux/mfd/motorola-cpcap.h | 292 +++++++++++++++++++++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 include/linux/mfd/motorola-cpcap.h (limited to 'include/linux') diff --git a/include/linux/mfd/motorola-cpcap.h b/include/linux/mfd/motorola-cpcap.h new file mode 100644 index 000000000000..b4031c2b2214 --- /dev/null +++ b/include/linux/mfd/motorola-cpcap.h @@ -0,0 +1,292 @@ +/* + * The register defines are based on earlier cpcap.h in Motorola Linux kernel + * tree. + * + * Copyright (C) 2007-2009 Motorola, Inc. + * + * Rewritten for the real register offsets instead of enumeration + * to make the defines usable with Linux kernel regmap support + * + * Copyright (C) 2016 Tony Lindgren + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define CPCAP_VENDOR_ST 0 +#define CPCAP_VENDOR_TI 1 + +#define CPCAP_REVISION_MAJOR(r) (((r) >> 4) + 1) +#define CPCAP_REVISION_MINOR(r) ((r) & 0xf) + +#define CPCAP_REVISION_1_0 0x08 +#define CPCAP_REVISION_1_1 0x09 +#define CPCAP_REVISION_2_0 0x10 +#define CPCAP_REVISION_2_1 0x11 + +/* CPCAP registers */ +#define CPCAP_REG_INT1 0x0000 /* Interrupt 1 */ +#define CPCAP_REG_INT2 0x0004 /* Interrupt 2 */ +#define CPCAP_REG_INT3 0x0008 /* Interrupt 3 */ +#define CPCAP_REG_INT4 0x000c /* Interrupt 4 */ +#define CPCAP_REG_INTM1 0x0010 /* Interrupt Mask 1 */ +#define CPCAP_REG_INTM2 0x0014 /* Interrupt Mask 2 */ +#define CPCAP_REG_INTM3 0x0018 /* Interrupt Mask 3 */ +#define CPCAP_REG_INTM4 0x001c /* Interrupt Mask 4 */ +#define CPCAP_REG_INTS1 0x0020 /* Interrupt Sense 1 */ +#define CPCAP_REG_INTS2 0x0024 /* Interrupt Sense 2 */ +#define CPCAP_REG_INTS3 0x0028 /* Interrupt Sense 3 */ +#define CPCAP_REG_INTS4 0x002c /* Interrupt Sense 4 */ +#define CPCAP_REG_ASSIGN1 0x0030 /* Resource Assignment 1 */ +#define CPCAP_REG_ASSIGN2 0x0034 /* Resource Assignment 2 */ +#define CPCAP_REG_ASSIGN3 0x0038 /* Resource Assignment 3 */ +#define CPCAP_REG_ASSIGN4 0x003c /* Resource Assignment 4 */ +#define CPCAP_REG_ASSIGN5 0x0040 /* Resource Assignment 5 */ +#define CPCAP_REG_ASSIGN6 0x0044 /* Resource Assignment 6 */ +#define CPCAP_REG_VERSC1 0x0048 /* Version Control 1 */ +#define CPCAP_REG_VERSC2 0x004c /* Version Control 2 */ + +#define CPCAP_REG_MI1 0x0200 /* Macro Interrupt 1 */ +#define CPCAP_REG_MIM1 0x0204 /* Macro Interrupt Mask 1 */ +#define CPCAP_REG_MI2 0x0208 /* Macro Interrupt 2 */ +#define CPCAP_REG_MIM2 0x020c /* Macro Interrupt Mask 2 */ +#define CPCAP_REG_UCC1 0x0210 /* UC Control 1 */ +#define CPCAP_REG_UCC2 0x0214 /* UC Control 2 */ + +#define CPCAP_REG_PC1 0x021c /* Power Cut 1 */ +#define CPCAP_REG_PC2 0x0220 /* Power Cut 2 */ +#define CPCAP_REG_BPEOL 0x0224 /* BP and EOL */ +#define CPCAP_REG_PGC 0x0228 /* Power Gate and Control */ +#define CPCAP_REG_MT1 0x022c /* Memory Transfer 1 */ +#define CPCAP_REG_MT2 0x0230 /* Memory Transfer 2 */ +#define CPCAP_REG_MT3 0x0234 /* Memory Transfer 3 */ +#define CPCAP_REG_PF 0x0238 /* Print Format */ + +#define CPCAP_REG_SCC 0x0400 /* System Clock Control */ +#define CPCAP_REG_SW1 0x0404 /* Stop Watch 1 */ +#define CPCAP_REG_SW2 0x0408 /* Stop Watch 2 */ +#define CPCAP_REG_UCTM 0x040c /* UC Turbo Mode */ +#define CPCAP_REG_TOD1 0x0410 /* Time of Day 1 */ +#define CPCAP_REG_TOD2 0x0414 /* Time of Day 2 */ +#define CPCAP_REG_TODA1 0x0418 /* Time of Day Alarm 1 */ +#define CPCAP_REG_TODA2 0x041c /* Time of Day Alarm 2 */ +#define CPCAP_REG_DAY 0x0420 /* Day */ +#define CPCAP_REG_DAYA 0x0424 /* Day Alarm */ +#define CPCAP_REG_VAL1 0x0428 /* Validity 1 */ +#define CPCAP_REG_VAL2 0x042c /* Validity 2 */ + +#define CPCAP_REG_SDVSPLL 0x0600 /* Switcher DVS and PLL */ +#define CPCAP_REG_SI2CC1 0x0604 /* Switcher I2C Control 1 */ +#define CPCAP_REG_Si2CC2 0x0608 /* Switcher I2C Control 2 */ +#define CPCAP_REG_S1C1 0x060c /* Switcher 1 Control 1 */ +#define CPCAP_REG_S1C2 0x0610 /* Switcher 1 Control 2 */ +#define CPCAP_REG_S2C1 0x0614 /* Switcher 2 Control 1 */ +#define CPCAP_REG_S2C2 0x0618 /* Switcher 2 Control 2 */ +#define CPCAP_REG_S3C 0x061c /* Switcher 3 Control */ +#define CPCAP_REG_S4C1 0x0620 /* Switcher 4 Control 1 */ +#define CPCAP_REG_S4C2 0x0624 /* Switcher 4 Control 2 */ +#define CPCAP_REG_S5C 0x0628 /* Switcher 5 Control */ +#define CPCAP_REG_S6C 0x062c /* Switcher 6 Control */ +#define CPCAP_REG_VCAMC 0x0630 /* VCAM Control */ +#define CPCAP_REG_VCSIC 0x0634 /* VCSI Control */ +#define CPCAP_REG_VDACC 0x0638 /* VDAC Control */ +#define CPCAP_REG_VDIGC 0x063c /* VDIG Control */ +#define CPCAP_REG_VFUSEC 0x0640 /* VFUSE Control */ +#define CPCAP_REG_VHVIOC 0x0644 /* VHVIO Control */ +#define CPCAP_REG_VSDIOC 0x0648 /* VSDIO Control */ +#define CPCAP_REG_VPLLC 0x064c /* VPLL Control */ +#define CPCAP_REG_VRF1C 0x0650 /* VRF1 Control */ +#define CPCAP_REG_VRF2C 0x0654 /* VRF2 Control */ +#define CPCAP_REG_VRFREFC 0x0658 /* VRFREF Control */ +#define CPCAP_REG_VWLAN1C 0x065c /* VWLAN1 Control */ +#define CPCAP_REG_VWLAN2C 0x0660 /* VWLAN2 Control */ +#define CPCAP_REG_VSIMC 0x0664 /* VSIM Control */ +#define CPCAP_REG_VVIBC 0x0668 /* VVIB Control */ +#define CPCAP_REG_VUSBC 0x066c /* VUSB Control */ +#define CPCAP_REG_VUSBINT1C 0x0670 /* VUSBINT1 Control */ +#define CPCAP_REG_VUSBINT2C 0x0674 /* VUSBINT2 Control */ +#define CPCAP_REG_URT 0x0678 /* Useroff Regulator Trigger */ +#define CPCAP_REG_URM1 0x067c /* Useroff Regulator Mask 1 */ +#define CPCAP_REG_URM2 0x0680 /* Useroff Regulator Mask 2 */ + +#define CPCAP_REG_VAUDIOC 0x0800 /* VAUDIO Control */ +#define CPCAP_REG_CC 0x0804 /* Codec Control */ +#define CPCAP_REG_CDI 0x0808 /* Codec Digital Interface */ +#define CPCAP_REG_SDAC 0x080c /* Stereo DAC */ +#define CPCAP_REG_SDACDI 0x0810 /* Stereo DAC Digital Interface */ +#define CPCAP_REG_TXI 0x0814 /* TX Inputs */ +#define CPCAP_REG_TXMP 0x0818 /* TX MIC PGA's */ +#define CPCAP_REG_RXOA 0x081c /* RX Output Amplifiers */ +#define CPCAP_REG_RXVC 0x0820 /* RX Volume Control */ +#define CPCAP_REG_RXCOA 0x0824 /* RX Codec to Output Amps */ +#define CPCAP_REG_RXSDOA 0x0828 /* RX Stereo DAC to Output Amps */ +#define CPCAP_REG_RXEPOA 0x082c /* RX External PGA to Output Amps */ +#define CPCAP_REG_RXLL 0x0830 /* RX Low Latency */ +#define CPCAP_REG_A2LA 0x0834 /* A2 Loudspeaker Amplifier */ +#define CPCAP_REG_MIPIS1 0x0838 /* MIPI Slimbus 1 */ +#define CPCAP_REG_MIPIS2 0x083c /* MIPI Slimbus 2 */ +#define CPCAP_REG_MIPIS3 0x0840 /* MIPI Slimbus 3. */ +#define CPCAP_REG_LVAB 0x0844 /* LMR Volume and A4 Balanced. */ + +#define CPCAP_REG_CCC1 0x0a00 /* Coulomb Counter Control 1 */ +#define CPCAP_REG_CRM 0x0a04 /* Charger and Reverse Mode */ +#define CPCAP_REG_CCCC2 0x0a08 /* Coincell and Coulomb Ctr Ctrl 2 */ +#define CPCAP_REG_CCS1 0x0a0c /* Coulomb Counter Sample 1 */ +#define CPCAP_REG_CCS2 0x0a10 /* Coulomb Counter Sample 2 */ +#define CPCAP_REG_CCA1 0x0a14 /* Coulomb Counter Accumulator 1 */ +#define CPCAP_REG_CCA2 0x0a18 /* Coulomb Counter Accumulator 2 */ +#define CPCAP_REG_CCM 0x0a1c /* Coulomb Counter Mode */ +#define CPCAP_REG_CCO 0x0a20 /* Coulomb Counter Offset */ +#define CPCAP_REG_CCI 0x0a24 /* Coulomb Counter Integrator */ + +#define CPCAP_REG_ADCC1 0x0c00 /* A/D Converter Configuration 1 */ +#define CPCAP_REG_ADCC2 0x0c04 /* A/D Converter Configuration 2 */ +#define CPCAP_REG_ADCD0 0x0c08 /* A/D Converter Data 0 */ +#define CPCAP_REG_ADCD1 0x0c0c /* A/D Converter Data 1 */ +#define CPCAP_REG_ADCD2 0x0c10 /* A/D Converter Data 2 */ +#define CPCAP_REG_ADCD3 0x0c14 /* A/D Converter Data 3 */ +#define CPCAP_REG_ADCD4 0x0c18 /* A/D Converter Data 4 */ +#define CPCAP_REG_ADCD5 0x0c1c /* A/D Converter Data 5 */ +#define CPCAP_REG_ADCD6 0x0c20 /* A/D Converter Data 6 */ +#define CPCAP_REG_ADCD7 0x0c24 /* A/D Converter Data 7 */ +#define CPCAP_REG_ADCAL1 0x0c28 /* A/D Converter Calibration 1 */ +#define CPCAP_REG_ADCAL2 0x0c2c /* A/D Converter Calibration 2 */ + +#define CPCAP_REG_USBC1 0x0e00 /* USB Control 1 */ +#define CPCAP_REG_USBC2 0x0e04 /* USB Control 2 */ +#define CPCAP_REG_USBC3 0x0e08 /* USB Control 3 */ +#define CPCAP_REG_UVIDL 0x0e0c /* ULPI Vendor ID Low */ +#define CPCAP_REG_UVIDH 0x0e10 /* ULPI Vendor ID High */ +#define CPCAP_REG_UPIDL 0x0e14 /* ULPI Product ID Low */ +#define CPCAP_REG_UPIDH 0x0e18 /* ULPI Product ID High */ +#define CPCAP_REG_UFC1 0x0e1c /* ULPI Function Control 1 */ +#define CPCAP_REG_UFC2 0x0e20 /* ULPI Function Control 2 */ +#define CPCAP_REG_UFC3 0x0e24 /* ULPI Function Control 3 */ +#define CPCAP_REG_UIC1 0x0e28 /* ULPI Interface Control 1 */ +#define CPCAP_REG_UIC2 0x0e2c /* ULPI Interface Control 2 */ +#define CPCAP_REG_UIC3 0x0e30 /* ULPI Interface Control 3 */ +#define CPCAP_REG_USBOTG1 0x0e34 /* USB OTG Control 1 */ +#define CPCAP_REG_USBOTG2 0x0e38 /* USB OTG Control 2 */ +#define CPCAP_REG_USBOTG3 0x0e3c /* USB OTG Control 3 */ +#define CPCAP_REG_UIER1 0x0e40 /* USB Interrupt Enable Rising 1 */ +#define CPCAP_REG_UIER2 0x0e44 /* USB Interrupt Enable Rising 2 */ +#define CPCAP_REG_UIER3 0x0e48 /* USB Interrupt Enable Rising 3 */ +#define CPCAP_REG_UIEF1 0x0e4c /* USB Interrupt Enable Falling 1 */ +#define CPCAP_REG_UIEF2 0x0e50 /* USB Interrupt Enable Falling 1 */ +#define CPCAP_REG_UIEF3 0x0e54 /* USB Interrupt Enable Falling 1 */ +#define CPCAP_REG_UIS 0x0e58 /* USB Interrupt Status */ +#define CPCAP_REG_UIL 0x0e5c /* USB Interrupt Latch */ +#define CPCAP_REG_USBD 0x0e60 /* USB Debug */ +#define CPCAP_REG_SCR1 0x0e64 /* Scratch 1 */ +#define CPCAP_REG_SCR2 0x0e68 /* Scratch 2 */ +#define CPCAP_REG_SCR3 0x0e6c /* Scratch 3 */ + +#define CPCAP_REG_VMC 0x0eac /* Video Mux Control */ +#define CPCAP_REG_OWDC 0x0eb0 /* One Wire Device Control */ +#define CPCAP_REG_GPIO0 0x0eb4 /* GPIO 0 Control */ + +#define CPCAP_REG_GPIO1 0x0ebc /* GPIO 1 Control */ + +#define CPCAP_REG_GPIO2 0x0ec4 /* GPIO 2 Control */ + +#define CPCAP_REG_GPIO3 0x0ecc /* GPIO 3 Control */ + +#define CPCAP_REG_GPIO4 0x0ed4 /* GPIO 4 Control */ + +#define CPCAP_REG_GPIO5 0x0edc /* GPIO 5 Control */ + +#define CPCAP_REG_GPIO6 0x0ee4 /* GPIO 6 Control */ + +#define CPCAP_REG_MDLC 0x1000 /* Main Display Lighting Control */ +#define CPCAP_REG_KLC 0x1004 /* Keypad Lighting Control */ +#define CPCAP_REG_ADLC 0x1008 /* Aux Display Lighting Control */ +#define CPCAP_REG_REDC 0x100c /* Red Triode Control */ +#define CPCAP_REG_GREENC 0x1010 /* Green Triode Control */ +#define CPCAP_REG_BLUEC 0x1014 /* Blue Triode Control */ +#define CPCAP_REG_CFC 0x1018 /* Camera Flash Control */ +#define CPCAP_REG_ABC 0x101c /* Adaptive Boost Control */ +#define CPCAP_REG_BLEDC 0x1020 /* Bluetooth LED Control */ +#define CPCAP_REG_CLEDC 0x1024 /* Camera Privacy LED Control */ + +#define CPCAP_REG_OW1C 0x1200 /* One Wire 1 Command */ +#define CPCAP_REG_OW1D 0x1204 /* One Wire 1 Data */ +#define CPCAP_REG_OW1I 0x1208 /* One Wire 1 Interrupt */ +#define CPCAP_REG_OW1IE 0x120c /* One Wire 1 Interrupt Enable */ + +#define CPCAP_REG_OW1 0x1214 /* One Wire 1 Control */ + +#define CPCAP_REG_OW2C 0x1220 /* One Wire 2 Command */ +#define CPCAP_REG_OW2D 0x1224 /* One Wire 2 Data */ +#define CPCAP_REG_OW2I 0x1228 /* One Wire 2 Interrupt */ +#define CPCAP_REG_OW2IE 0x122c /* One Wire 2 Interrupt Enable */ + +#define CPCAP_REG_OW2 0x1234 /* One Wire 2 Control */ + +#define CPCAP_REG_OW3C 0x1240 /* One Wire 3 Command */ +#define CPCAP_REG_OW3D 0x1244 /* One Wire 3 Data */ +#define CPCAP_REG_OW3I 0x1248 /* One Wire 3 Interrupt */ +#define CPCAP_REG_OW3IE 0x124c /* One Wire 3 Interrupt Enable */ + +#define CPCAP_REG_OW3 0x1254 /* One Wire 3 Control */ +#define CPCAP_REG_GCAIC 0x1258 /* GCAI Clock Control */ +#define CPCAP_REG_GCAIM 0x125c /* GCAI GPIO Mode */ +#define CPCAP_REG_LGDIR 0x1260 /* LMR GCAI GPIO Direction */ +#define CPCAP_REG_LGPU 0x1264 /* LMR GCAI GPIO Pull-up */ +#define CPCAP_REG_LGPIN 0x1268 /* LMR GCAI GPIO Pin */ +#define CPCAP_REG_LGMASK 0x126c /* LMR GCAI GPIO Mask */ +#define CPCAP_REG_LDEB 0x1270 /* LMR Debounce Settings */ +#define CPCAP_REG_LGDET 0x1274 /* LMR GCAI Detach Detect */ +#define CPCAP_REG_LMISC 0x1278 /* LMR Misc Bits */ +#define CPCAP_REG_LMACE 0x127c /* LMR Mace IC Support */ + +#define CPCAP_REG_TEST 0x7c00 /* Test */ + +#define CPCAP_REG_ST_TEST1 0x7d08 /* ST Test1 */ + +#define CPCAP_REG_ST_TEST2 0x7d18 /* ST Test2 */ + +/* + * Helpers for child devices to check the revision and vendor. + * + * REVISIT: No documentation for the bits below, please update + * to use proper names for defines when available. + */ + +static inline int cpcap_get_revision(struct device *dev, + struct regmap *regmap, + u16 *revision) +{ + unsigned int val; + int ret; + + ret = regmap_read(regmap, CPCAP_REG_VERSC1, &val); + if (ret) { + dev_err(dev, "Could not read revision\n"); + + return ret; + } + + *revision = ((val >> 3) & 0x7) | ((val << 3) & 0x38); + + return 0; +} + +static inline int cpcap_get_vendor(struct device *dev, + struct regmap *regmap, + u16 *vendor) +{ + unsigned int val; + int ret; + + ret = regmap_read(regmap, CPCAP_REG_VERSC1, &val); + if (ret) { + dev_err(dev, "Could not read vendor\n"); + + return ret; + } + + *vendor = (val >> 6) & 0x7; + + return 0; +} -- cgit v1.2.3 From 46bc5c408e4e325efbfff26609c76099979180a7 Mon Sep 17 00:00:00 2001 From: Jungseung Lee Date: Thu, 22 Dec 2016 12:37:34 +0900 Subject: mmc: core: Export device lifetime information through sysfs In the eMMC 5.0 version of the spec, several EXT_CSD fields about device lifetime are added. - Two types of estimated indications reflected by averaged wear out of memory - An indication reflected by average reserved blocks Export the information through sysfs. Signed-off-by: Jungseung Lee Reviewed-by: Jaehoon Chung Reviewed-by: Shawn Lin Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 3 +++ include/linux/mmc/mmc.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 95d69d498296..00449e51838a 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -121,6 +121,9 @@ struct mmc_ext_csd { u8 raw_pwr_cl_ddr_200_360; /* 253 */ u8 raw_bkops_status; /* 246 */ u8 raw_sectors[4]; /* 212 - 4 bytes */ + u8 pre_eol_info; /* 267 */ + u8 device_life_time_est_typ_a; /* 268 */ + u8 device_life_time_est_typ_b; /* 269 */ unsigned int feature_support; #define MMC_DISCARD_FEATURE BIT(0) /* CMD38 feature */ diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 672730acc705..a0740829ff00 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -339,6 +339,9 @@ struct _mmc_csd { #define EXT_CSD_CACHE_SIZE 249 /* RO, 4 bytes */ #define EXT_CSD_PWR_CL_DDR_200_360 253 /* RO */ #define EXT_CSD_FIRMWARE_VERSION 254 /* RO, 8 bytes */ +#define EXT_CSD_PRE_EOL_INFO 267 /* RO */ +#define EXT_CSD_DEVICE_LIFE_TIME_EST_TYP_A 268 /* RO */ +#define EXT_CSD_DEVICE_LIFE_TIME_EST_TYP_B 269 /* RO */ #define EXT_CSD_CMDQ_DEPTH 307 /* RO */ #define EXT_CSD_CMDQ_SUPPORT 308 /* RO */ #define EXT_CSD_SUPPORTED_MODE 493 /* RO */ -- cgit v1.2.3 From 13f00f9f2460cfa106de6c30512b648d85831468 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 30 Dec 2016 13:47:15 +0100 Subject: mmc: Removed the unused public mmc boot.h header Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij --- include/linux/mmc/boot.h | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 include/linux/mmc/boot.h (limited to 'include/linux') diff --git a/include/linux/mmc/boot.h b/include/linux/mmc/boot.h deleted file mode 100644 index 23acc3baa07d..000000000000 --- a/include/linux/mmc/boot.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef LINUX_MMC_BOOT_H -#define LINUX_MMC_BOOT_H - -enum { MMC_PROGRESS_ENTER, MMC_PROGRESS_INIT, - MMC_PROGRESS_LOAD, MMC_PROGRESS_DONE }; - -#endif /* LINUX_MMC_BOOT_H */ -- cgit v1.2.3 From 95cc4df716a210a19f0611215c49484d460250fd Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 30 Dec 2016 13:47:16 +0100 Subject: mmc: sh_mmcif: Remove unused use_cd_gpio/cd_gpio from platform data Cc: Kuninori Morimoto Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij --- include/linux/mmc/sh_mmcif.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h index ccd8fb2cad52..5ce5a2c1a1f5 100644 --- a/include/linux/mmc/sh_mmcif.h +++ b/include/linux/mmc/sh_mmcif.h @@ -35,10 +35,8 @@ struct sh_mmcif_plat_data { int (*get_cd)(struct platform_device *pdef); unsigned int slave_id_tx; /* embedded slave_id_[tr]x */ unsigned int slave_id_rx; - bool use_cd_gpio : 1; bool ccs_unsupported : 1; bool clk_ctrl2_present : 1; - unsigned int cd_gpio; u8 sup_pclk; /* 1 :SH7757, 0: SH7724/SH7372 */ unsigned long caps; u32 ocr; -- cgit v1.2.3 From 5957eeba530747e9d77daf2f300a186758be51d9 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 30 Dec 2016 13:47:17 +0100 Subject: mmc: sh_mmcif: Remove unused ->get_cd() platform callback Removing the callback also enables us to remove the sh_mmcif_get_cd() altogether, as we convert to use mmc_gpio_get_cd() to the same kind of work. Cc: Kuninori Morimoto Signed-off-by: Ulf Hansson --- include/linux/mmc/sh_mmcif.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h index 5ce5a2c1a1f5..7cafc95a2b4e 100644 --- a/include/linux/mmc/sh_mmcif.h +++ b/include/linux/mmc/sh_mmcif.h @@ -32,7 +32,6 @@ */ struct sh_mmcif_plat_data { - int (*get_cd)(struct platform_device *pdef); unsigned int slave_id_tx; /* embedded slave_id_[tr]x */ unsigned int slave_id_rx; bool ccs_unsupported : 1; -- cgit v1.2.3 From 8020f71117042ed82287e4f51c48b57ce4c783df Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 30 Dec 2016 13:47:18 +0100 Subject: mmc: sh_mmcif: Remove unused ccs_unsupported from the platform data There are currently no users of the ccs_unsupported member from the platform data, so let's remove it. Note, as some of the sh_mmcif variants may not support ccs, let's keep the current code in the driver, which deals with this. For future support, we should invent a DT binding instead, but let's leave that until it's needed. Cc: Kuninori Morimoto Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij --- include/linux/mmc/sh_mmcif.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h index 7cafc95a2b4e..384b86b90473 100644 --- a/include/linux/mmc/sh_mmcif.h +++ b/include/linux/mmc/sh_mmcif.h @@ -34,7 +34,6 @@ struct sh_mmcif_plat_data { unsigned int slave_id_tx; /* embedded slave_id_[tr]x */ unsigned int slave_id_rx; - bool ccs_unsupported : 1; bool clk_ctrl2_present : 1; u8 sup_pclk; /* 1 :SH7757, 0: SH7724/SH7372 */ unsigned long caps; -- cgit v1.2.3 From dba4bb484e9e495478f2bcf474393d33f7e0ec27 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 30 Dec 2016 13:47:19 +0100 Subject: mmc: sh_mmcif: Remove unused clk_ctrl2_present from the platform data There are currently no users of the clk_ctrl2_present member from the platform data, so let's remove it. Note, as some of the sh_mmcif variants may support clk_ctrl2, let's keep the current code in the driver, which deals with this. For future support, we should invent a DT binding instead, but let's leave that until it's needed. Cc: Kuninori Morimoto Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij --- include/linux/mmc/sh_mmcif.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h index 384b86b90473..a7baa29484c3 100644 --- a/include/linux/mmc/sh_mmcif.h +++ b/include/linux/mmc/sh_mmcif.h @@ -34,7 +34,6 @@ struct sh_mmcif_plat_data { unsigned int slave_id_tx; /* embedded slave_id_[tr]x */ unsigned int slave_id_rx; - bool clk_ctrl2_present : 1; u8 sup_pclk; /* 1 :SH7757, 0: SH7724/SH7372 */ unsigned long caps; u32 ocr; -- cgit v1.2.3 From 0f21c58c63add705065a75495e7a1860a62470ed Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 30 Dec 2016 13:47:20 +0100 Subject: mmc: dw_mmc: Remove the public dw_mmc header file There are currently no external users of the public dw_mmc header file, except the dw_mmc driver itself. Therefore let's move the definitions from the public dw_mmc header file into the existing private dw_mmc header file and then remove the public one. Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij --- include/linux/mmc/dw_mmc.h | 274 --------------------------------------------- 1 file changed, 274 deletions(-) delete mode 100644 include/linux/mmc/dw_mmc.h (limited to 'include/linux') diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h deleted file mode 100644 index 15db6f83f53f..000000000000 --- a/include/linux/mmc/dw_mmc.h +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Synopsys DesignWare Multimedia Card Interface driver - * (Based on NXP driver for lpc 31xx) - * - * Copyright (C) 2009 NXP Semiconductors - * Copyright (C) 2009, 2010 Imagination Technologies Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef LINUX_MMC_DW_MMC_H -#define LINUX_MMC_DW_MMC_H - -#include -#include -#include -#include - -#define MAX_MCI_SLOTS 2 - -enum dw_mci_state { - STATE_IDLE = 0, - STATE_SENDING_CMD, - STATE_SENDING_DATA, - STATE_DATA_BUSY, - STATE_SENDING_STOP, - STATE_DATA_ERROR, - STATE_SENDING_CMD11, - STATE_WAITING_CMD11_DONE, -}; - -enum { - EVENT_CMD_COMPLETE = 0, - EVENT_XFER_COMPLETE, - EVENT_DATA_COMPLETE, - EVENT_DATA_ERROR, -}; - -enum dw_mci_cookie { - COOKIE_UNMAPPED, - COOKIE_PRE_MAPPED, /* mapped by pre_req() of dwmmc */ - COOKIE_MAPPED, /* mapped by prepare_data() of dwmmc */ -}; - -struct mmc_data; - -enum { - TRANS_MODE_PIO = 0, - TRANS_MODE_IDMAC, - TRANS_MODE_EDMAC -}; - -struct dw_mci_dma_slave { - struct dma_chan *ch; - enum dma_transfer_direction direction; -}; - -/** - * struct dw_mci - MMC controller state shared between all slots - * @lock: Spinlock protecting the queue and associated data. - * @irq_lock: Spinlock protecting the INTMASK setting. - * @regs: Pointer to MMIO registers. - * @fifo_reg: Pointer to MMIO registers for data FIFO - * @sg: Scatterlist entry currently being processed by PIO code, if any. - * @sg_miter: PIO mapping scatterlist iterator. - * @cur_slot: The slot which is currently using the controller. - * @mrq: The request currently being processed on @cur_slot, - * or NULL if the controller is idle. - * @cmd: The command currently being sent to the card, or NULL. - * @data: The data currently being transferred, or NULL if no data - * transfer is in progress. - * @stop_abort: The command currently prepared for stoping transfer. - * @prev_blksz: The former transfer blksz record. - * @timing: Record of current ios timing. - * @use_dma: Whether DMA channel is initialized or not. - * @using_dma: Whether DMA is in use for the current transfer. - * @dma_64bit_address: Whether DMA supports 64-bit address mode or not. - * @sg_dma: Bus address of DMA buffer. - * @sg_cpu: Virtual address of DMA buffer. - * @dma_ops: Pointer to platform-specific DMA callbacks. - * @cmd_status: Snapshot of SR taken upon completion of the current - * @ring_size: Buffer size for idma descriptors. - * command. Only valid when EVENT_CMD_COMPLETE is pending. - * @dms: structure of slave-dma private data. - * @phy_regs: physical address of controller's register map - * @data_status: Snapshot of SR taken upon completion of the current - * data transfer. Only valid when EVENT_DATA_COMPLETE or - * EVENT_DATA_ERROR is pending. - * @stop_cmdr: Value to be loaded into CMDR when the stop command is - * to be sent. - * @dir_status: Direction of current transfer. - * @tasklet: Tasklet running the request state machine. - * @pending_events: Bitmask of events flagged by the interrupt handler - * to be processed by the tasklet. - * @completed_events: Bitmask of events which the state machine has - * processed. - * @state: Tasklet state. - * @queue: List of slots waiting for access to the controller. - * @bus_hz: The rate of @mck in Hz. This forms the basis for MMC bus - * rate and timeout calculations. - * @current_speed: Configured rate of the controller. - * @num_slots: Number of slots available. - * @fifoth_val: The value of FIFOTH register. - * @verid: Denote Version ID. - * @dev: Device associated with the MMC controller. - * @pdata: Platform data associated with the MMC controller. - * @drv_data: Driver specific data for identified variant of the controller - * @priv: Implementation defined private data. - * @biu_clk: Pointer to bus interface unit clock instance. - * @ciu_clk: Pointer to card interface unit clock instance. - * @slot: Slots sharing this MMC controller. - * @fifo_depth: depth of FIFO. - * @data_shift: log2 of FIFO item size. - * @part_buf_start: Start index in part_buf. - * @part_buf_count: Bytes of partial data in part_buf. - * @part_buf: Simple buffer for partial fifo reads/writes. - * @push_data: Pointer to FIFO push function. - * @pull_data: Pointer to FIFO pull function. - * @vqmmc_enabled: Status of vqmmc, should be true or false. - * @irq_flags: The flags to be passed to request_irq. - * @irq: The irq value to be passed to request_irq. - * @sdio_id0: Number of slot0 in the SDIO interrupt registers. - * @cmd11_timer: Timer for SD3.0 voltage switch over scheme. - * @dto_timer: Timer for broken data transfer over scheme. - * - * Locking - * ======= - * - * @lock is a softirq-safe spinlock protecting @queue as well as - * @cur_slot, @mrq and @state. These must always be updated - * at the same time while holding @lock. - * - * @irq_lock is an irq-safe spinlock protecting the INTMASK register - * to allow the interrupt handler to modify it directly. Held for only long - * enough to read-modify-write INTMASK and no other locks are grabbed when - * holding this one. - * - * The @mrq field of struct dw_mci_slot is also protected by @lock, - * and must always be written at the same time as the slot is added to - * @queue. - * - * @pending_events and @completed_events are accessed using atomic bit - * operations, so they don't need any locking. - * - * None of the fields touched by the interrupt handler need any - * locking. However, ordering is important: Before EVENT_DATA_ERROR or - * EVENT_DATA_COMPLETE is set in @pending_events, all data-related - * interrupts must be disabled and @data_status updated with a - * snapshot of SR. Similarly, before EVENT_CMD_COMPLETE is set, the - * CMDRDY interrupt must be disabled and @cmd_status updated with a - * snapshot of SR, and before EVENT_XFER_COMPLETE can be set, the - * bytes_xfered field of @data must be written. This is ensured by - * using barriers. - */ -struct dw_mci { - spinlock_t lock; - spinlock_t irq_lock; - void __iomem *regs; - void __iomem *fifo_reg; - - struct scatterlist *sg; - struct sg_mapping_iter sg_miter; - - struct dw_mci_slot *cur_slot; - struct mmc_request *mrq; - struct mmc_command *cmd; - struct mmc_data *data; - struct mmc_command stop_abort; - unsigned int prev_blksz; - unsigned char timing; - - /* DMA interface members*/ - int use_dma; - int using_dma; - int dma_64bit_address; - - dma_addr_t sg_dma; - void *sg_cpu; - const struct dw_mci_dma_ops *dma_ops; - /* For idmac */ - unsigned int ring_size; - - /* For edmac */ - struct dw_mci_dma_slave *dms; - /* Registers's physical base address */ - resource_size_t phy_regs; - - u32 cmd_status; - u32 data_status; - u32 stop_cmdr; - u32 dir_status; - struct tasklet_struct tasklet; - unsigned long pending_events; - unsigned long completed_events; - enum dw_mci_state state; - struct list_head queue; - - u32 bus_hz; - u32 current_speed; - u32 num_slots; - u32 fifoth_val; - u16 verid; - struct device *dev; - struct dw_mci_board *pdata; - const struct dw_mci_drv_data *drv_data; - void *priv; - struct clk *biu_clk; - struct clk *ciu_clk; - struct dw_mci_slot *slot[MAX_MCI_SLOTS]; - - /* FIFO push and pull */ - int fifo_depth; - int data_shift; - u8 part_buf_start; - u8 part_buf_count; - union { - u16 part_buf16; - u32 part_buf32; - u64 part_buf; - }; - void (*push_data)(struct dw_mci *host, void *buf, int cnt); - void (*pull_data)(struct dw_mci *host, void *buf, int cnt); - - bool vqmmc_enabled; - unsigned long irq_flags; /* IRQ flags */ - int irq; - - int sdio_id0; - - struct timer_list cmd11_timer; - struct timer_list dto_timer; -}; - -/* DMA ops for Internal/External DMAC interface */ -struct dw_mci_dma_ops { - /* DMA Ops */ - int (*init)(struct dw_mci *host); - int (*start)(struct dw_mci *host, unsigned int sg_len); - void (*complete)(void *host); - void (*stop)(struct dw_mci *host); - void (*cleanup)(struct dw_mci *host); - void (*exit)(struct dw_mci *host); -}; - -struct dma_pdata; - -/* Board platform data */ -struct dw_mci_board { - u32 num_slots; - - unsigned int bus_hz; /* Clock speed at the cclk_in pad */ - - u32 caps; /* Capabilities */ - u32 caps2; /* More capabilities */ - u32 pm_caps; /* PM capabilities */ - /* - * Override fifo depth. If 0, autodetect it from the FIFOTH register, - * but note that this may not be reliable after a bootloader has used - * it. - */ - unsigned int fifo_depth; - - /* delay in mS before detecting cards after interrupt */ - u32 detect_delay_ms; - - struct reset_control *rstc; - struct dw_mci_dma_ops *dma_ops; - struct dma_pdata *data; -}; - -#endif /* LINUX_MMC_DW_MMC_H */ -- cgit v1.2.3 From 8336bf68b57e1e736be8d4ef1f46a789fe7b9bde Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 11 Jan 2017 12:28:15 +0100 Subject: mmc: mxcmmc: Include interrupt.h in the platform data header The mxcmmc platform data header depends on interrupt.h. Don't rely on the public mmc header host.h to include it, bud instead make that dependency explicit. Signed-off-by: Ulf Hansson --- include/linux/platform_data/mmc-mxcmmc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/mmc-mxcmmc.h b/include/linux/platform_data/mmc-mxcmmc.h index 29115f405af9..b0fdaa9bd185 100644 --- a/include/linux/platform_data/mmc-mxcmmc.h +++ b/include/linux/platform_data/mmc-mxcmmc.h @@ -1,6 +1,7 @@ #ifndef ASMARM_ARCH_MMC_H #define ASMARM_ARCH_MMC_H +#include #include struct device; -- cgit v1.2.3 From 8da007348bf52a91e5137d27d7dcd528edbb80ce Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Jan 2017 14:14:06 +0100 Subject: mmc: core: First step in cleaning up public mmc header files This is the first step in cleaning up the public mmc header files. In this change we makes sure each header file builds standalone, as that helps to resolve dependencies. While changing this, it also seems reasonable to stop including other headers from inside a header itself which it don't depend upon. Additionally, in some cases such dependencies are better resolved by forward declaring the needed struct. Signed-off-by: Ulf Hansson Reviewed-by: Shawn Lin --- include/linux/mmc/card.h | 2 -- include/linux/mmc/core.h | 3 +-- include/linux/mmc/host.h | 9 ++------- include/linux/mmc/mmc.h | 2 ++ include/linux/mmc/slot-gpio.h | 3 +++ 5 files changed, 8 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 00449e51838a..ca64f5b67983 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -11,7 +11,6 @@ #define LINUX_MMC_CARD_H #include -#include #include struct mmc_cid { @@ -206,7 +205,6 @@ struct sdio_cis { }; struct mmc_host; -struct mmc_ios; struct sdio_func; struct sdio_func_tuple; diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index e33cc748dcfe..64e2ddf33f38 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -8,10 +8,9 @@ #ifndef LINUX_MMC_CORE_H #define LINUX_MMC_CORE_H -#include #include +#include -struct request; struct mmc_data; struct mmc_request; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 8bc884121465..8d38c76b06d6 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -10,16 +10,12 @@ #ifndef LINUX_MMC_HOST_H #define LINUX_MMC_HOST_H -#include -#include -#include #include #include #include #include #include -#include #include struct mmc_ios { @@ -82,6 +78,8 @@ struct mmc_ios { bool enhanced_strobe; /* hs400es selection */ }; +struct mmc_host; + struct mmc_host_ops { /* * It is optional for the host to implement pre_req and post_req in @@ -162,9 +160,6 @@ struct mmc_host_ops { unsigned int direction, int blk_size); }; -struct mmc_card; -struct device; - struct mmc_async_req { /* active mmc request */ struct mmc_request *mrq; diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index a0740829ff00..261772e3effe 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -24,6 +24,8 @@ #ifndef LINUX_MMC_MMC_H #define LINUX_MMC_MMC_H +#include + /* Standard MMC commands (4.1) type argument response */ /* class 1 */ #define MMC_GO_IDLE_STATE 0 /* bc */ diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index a7972cd3bc14..82f0d289f110 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -11,6 +11,9 @@ #ifndef MMC_SLOT_GPIO_H #define MMC_SLOT_GPIO_H +#include +#include + struct mmc_host; int mmc_gpio_get_ro(struct mmc_host *host); -- cgit v1.2.3 From 55244c5659b5e73a969b285a2e763223d8aab979 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Jan 2017 14:14:08 +0100 Subject: mmc: core: Move public functions from core.h to private headers A significant amount of functions are available through the public mmc core.h header file. Let's slim down this public mmc interface, as to prevent users from abusing it, by moving some of the functions to private mmc header files. This change concentrates on moving the functions into private mmc headers, following changes may continue with additional clean-ups, as an example some functions can be turned into static. Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Reviewed-by: Shawn Lin --- include/linux/mmc/core.h | 50 ------------------------------------------------ 1 file changed, 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 64e2ddf33f38..e679a8678594 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -158,25 +158,13 @@ struct mmc_request { struct mmc_card; struct mmc_async_req; -extern int mmc_stop_bkops(struct mmc_card *); -extern int mmc_read_bkops_status(struct mmc_card *); extern struct mmc_async_req *mmc_start_req(struct mmc_host *, struct mmc_async_req *, enum mmc_blk_status *); -extern int mmc_interrupt_hpi(struct mmc_card *); extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *); -extern void mmc_wait_for_req_done(struct mmc_host *host, - struct mmc_request *mrq); -extern bool mmc_is_req_done(struct mmc_host *host, struct mmc_request *mrq); extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int); -extern int mmc_app_cmd(struct mmc_host *, struct mmc_card *); -extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *, - struct mmc_command *, int); -extern void mmc_start_bkops(struct mmc_card *card, bool from_exception); -extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int); extern int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); extern int mmc_abort_tuning(struct mmc_host *host, u32 opcode); -extern int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd); #define MMC_ERASE_ARG 0x00000000 #define MMC_SECURE_ERASE_ARG 0x80000000 @@ -188,46 +176,8 @@ extern int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd); #define MMC_SECURE_ARGS 0x80000000 #define MMC_TRIM_ARGS 0x00008001 -extern int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, - unsigned int arg); -extern int mmc_can_erase(struct mmc_card *card); -extern int mmc_can_trim(struct mmc_card *card); -extern int mmc_can_discard(struct mmc_card *card); -extern int mmc_can_sanitize(struct mmc_card *card); -extern int mmc_can_secure_erase_trim(struct mmc_card *card); -extern int mmc_erase_group_aligned(struct mmc_card *card, unsigned int from, - unsigned int nr); -extern unsigned int mmc_calc_max_discard(struct mmc_card *card); - -extern int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen); -extern int mmc_set_blockcount(struct mmc_card *card, unsigned int blockcount, - bool is_rel_write); extern int mmc_hw_reset(struct mmc_host *host); -extern int mmc_can_reset(struct mmc_card *card); - extern void mmc_set_data_timeout(struct mmc_data *, const struct mmc_card *); -extern unsigned int mmc_align_data_size(struct mmc_card *, unsigned int); - -extern int __mmc_claim_host(struct mmc_host *host, atomic_t *abort); -extern void mmc_release_host(struct mmc_host *host); - -extern void mmc_get_card(struct mmc_card *card); -extern void mmc_put_card(struct mmc_card *card); - -extern int mmc_flush_cache(struct mmc_card *); - -extern int mmc_detect_card_removed(struct mmc_host *host); - -/** - * mmc_claim_host - exclusively claim a host - * @host: mmc host to claim - * - * Claim a host for a set of operations. - */ -static inline void mmc_claim_host(struct mmc_host *host) -{ - __mmc_claim_host(host, NULL); -} struct device_node; extern u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max); -- cgit v1.2.3 From 9e1bbc72727506c173cc6ed1d704cb2d7911d9d5 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Jan 2017 14:14:09 +0100 Subject: mmc: core: Move some host specific public functions to host.h Ideally the public mmc header file, core.h, shouldn't contain interfaces particularly intended to be used by host drivers. Instead those should remain in the host.h header file. Therefore, let's move a couple functions from core.h to host.h. Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Reviewed-by: Shawn Lin --- include/linux/mmc/core.h | 6 ------ include/linux/mmc/host.h | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index e679a8678594..faacc90f8711 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -163,8 +163,6 @@ extern struct mmc_async_req *mmc_start_req(struct mmc_host *, enum mmc_blk_status *); extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *); extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int); -extern int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); -extern int mmc_abort_tuning(struct mmc_host *host, u32 opcode); #define MMC_ERASE_ARG 0x00000000 #define MMC_SECURE_ERASE_ARG 0x80000000 @@ -179,8 +177,4 @@ extern int mmc_abort_tuning(struct mmc_host *host, u32 opcode); extern int mmc_hw_reset(struct mmc_host *host); extern void mmc_set_data_timeout(struct mmc_data *, const struct mmc_card *); -struct device_node; -extern u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max); -extern int mmc_of_parse_voltage(struct device_node *np, u32 *mask); - #endif /* LINUX_MMC_CORE_H */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 8d38c76b06d6..7de05195bff6 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -392,11 +392,14 @@ struct mmc_host { unsigned long private[0] ____cacheline_aligned; }; +struct device_node; + struct mmc_host *mmc_alloc_host(int extra, struct device *); int mmc_add_host(struct mmc_host *); void mmc_remove_host(struct mmc_host *); void mmc_free_host(struct mmc_host *); int mmc_of_parse(struct mmc_host *host); +int mmc_of_parse_voltage(struct device_node *np, u32 *mask); static inline void *mmc_priv(struct mmc_host *host) { @@ -452,6 +455,7 @@ static inline int mmc_regulator_set_vqmmc(struct mmc_host *mmc, } #endif +u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max); int mmc_regulator_get_supply(struct mmc_host *mmc); static inline int mmc_card_is_removable(struct mmc_host *host) @@ -538,6 +542,8 @@ static inline bool mmc_can_retune(struct mmc_host *host) return host->can_retune == 1; } +int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); +int mmc_abort_tuning(struct mmc_host *host, u32 opcode); void mmc_retune_pause(struct mmc_host *host); void mmc_retune_unpause(struct mmc_host *host); -- cgit v1.2.3 From c0a3e080f9294117a7d510a4f01a0b7c6dbcadae Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Jan 2017 14:14:10 +0100 Subject: mmc: core: Move erase/trim/discard defines from public core.h to mmc.h As the public mmc.h header already contains similar defines for other mmc commands and arguments, let's move those for erase/trim/discard into here as well. Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Reviewed-by: Shawn Lin --- include/linux/mmc/core.h | 10 ---------- include/linux/mmc/mmc.h | 13 ++++++++++++- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index faacc90f8711..6440e10c86cd 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -164,16 +164,6 @@ extern struct mmc_async_req *mmc_start_req(struct mmc_host *, extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *); extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int); -#define MMC_ERASE_ARG 0x00000000 -#define MMC_SECURE_ERASE_ARG 0x80000000 -#define MMC_TRIM_ARG 0x00000001 -#define MMC_DISCARD_ARG 0x00000003 -#define MMC_SECURE_TRIM1_ARG 0x80000001 -#define MMC_SECURE_TRIM2_ARG 0x80008000 - -#define MMC_SECURE_ARGS 0x80000000 -#define MMC_TRIM_ARGS 0x00008001 - extern int mmc_hw_reset(struct mmc_host *host); extern void mmc_set_data_timeout(struct mmc_data *, const struct mmc_card *); diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 261772e3effe..8f7854324d2b 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -462,12 +462,23 @@ struct _mmc_csd { /* * MMC_SWITCH access modes */ - #define MMC_SWITCH_MODE_CMD_SET 0x00 /* Change the command set */ #define MMC_SWITCH_MODE_SET_BITS 0x01 /* Set bits which are 1 in value */ #define MMC_SWITCH_MODE_CLEAR_BITS 0x02 /* Clear bits which are 1 in value */ #define MMC_SWITCH_MODE_WRITE_BYTE 0x03 /* Set target to value */ +/* + * Erase/trim/discard + */ +#define MMC_ERASE_ARG 0x00000000 +#define MMC_SECURE_ERASE_ARG 0x80000000 +#define MMC_TRIM_ARG 0x00000001 +#define MMC_DISCARD_ARG 0x00000003 +#define MMC_SECURE_TRIM1_ARG 0x80000001 +#define MMC_SECURE_TRIM2_ARG 0x80008000 +#define MMC_SECURE_ARGS 0x80000000 +#define MMC_TRIM_ARGS 0x00008001 + #define mmc_driver_type_mask(n) (1 << (n)) #endif /* LINUX_MMC_MMC_H */ -- cgit v1.2.3 From 009b0fcea5dfe77663f7ec3927c7c50a97a7675c Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Jan 2017 14:14:11 +0100 Subject: mmc: core: Remove unused struct _mmc_csd from public mmc.h header The struct _mmc_csd isn't being used and has been lurking around for a while. Let's kill it. Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Reviewed-by: Shawn Lin --- include/linux/mmc/mmc.h | 44 -------------------------------------------- 1 file changed, 44 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 8f7854324d2b..7406d9badda0 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -184,50 +184,6 @@ static inline bool mmc_op_multi(u32 opcode) #define R2_SPI_OUT_OF_RANGE (1 << 15) /* or CSD overwrite */ #define R2_SPI_CSD_OVERWRITE R2_SPI_OUT_OF_RANGE -/* These are unpacked versions of the actual responses */ - -struct _mmc_csd { - u8 csd_structure; - u8 spec_vers; - u8 taac; - u8 nsac; - u8 tran_speed; - u16 ccc; - u8 read_bl_len; - u8 read_bl_partial; - u8 write_blk_misalign; - u8 read_blk_misalign; - u8 dsr_imp; - u16 c_size; - u8 vdd_r_curr_min; - u8 vdd_r_curr_max; - u8 vdd_w_curr_min; - u8 vdd_w_curr_max; - u8 c_size_mult; - union { - struct { /* MMC system specification version 3.1 */ - u8 erase_grp_size; - u8 erase_grp_mult; - } v31; - struct { /* MMC system specification version 2.2 */ - u8 sector_size; - u8 erase_grp_size; - } v22; - } erase; - u8 wp_grp_size; - u8 wp_grp_enable; - u8 default_ecc; - u8 r2w_factor; - u8 write_bl_len; - u8 write_bl_partial; - u8 file_format_grp; - u8 copy; - u8 perm_write_protect; - u8 tmp_write_protect; - u8 file_format; - u8 ecc; -}; - /* * OCR bits are mostly in host.h */ -- cgit v1.2.3 From 4facdde11394d44b3869807841042d059f074a07 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Jan 2017 14:14:14 +0100 Subject: mmc: core: Move public functions from card.h to private headers A significant amount of functions and other definitions are available through the public mmc card.h header file. Let's slim down this public mmc interface, as to prevent users from abusing it, by moving some of the functions/definitions to private mmc header files. This change concentrates on moving the functions into private mmc headers, following changes may continue with additional clean-ups. Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Reviewed-by: Shawn Lin --- include/linux/mmc/card.h | 240 ----------------------------------------------- 1 file changed, 240 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index ca64f5b67983..29d00c91c25c 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -248,13 +248,6 @@ struct mmc_card { #define MMC_TYPE_SDIO 2 /* SDIO card */ #define MMC_TYPE_SD_COMBO 3 /* SD combo (IO+mem) card */ unsigned int state; /* (our) card state */ -#define MMC_STATE_PRESENT (1<<0) /* present in sysfs */ -#define MMC_STATE_READONLY (1<<1) /* card is read-only */ -#define MMC_STATE_BLOCKADDR (1<<2) /* card uses block-addressing */ -#define MMC_CARD_SDXC (1<<3) /* card is SDXC */ -#define MMC_CARD_REMOVED (1<<4) /* card has been removed */ -#define MMC_STATE_DOING_BKOPS (1<<5) /* card is doing BKOPS */ -#define MMC_STATE_SUSPENDED (1<<6) /* card is suspended */ unsigned int quirks; /* card quirks */ #define MMC_QUIRK_LENIENT_FN0 (1<<0) /* allow SDIO FN0 writes outside of the VS CCCR range */ #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1) /* use func->cur_blksize */ @@ -273,7 +266,6 @@ struct mmc_card { #define MMC_QUIRK_TRIM_BROKEN (1<<12) /* Skip trim */ #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ - unsigned int erase_size; /* erase size in sectors */ unsigned int erase_shift; /* if erase unit is power 2 */ unsigned int pref_erase; /* in sectors */ @@ -309,245 +301,13 @@ struct mmc_card { unsigned int nr_parts; }; -/* - * This function fill contents in mmc_part. - */ -static inline void mmc_part_add(struct mmc_card *card, unsigned int size, - unsigned int part_cfg, char *name, int idx, bool ro, - int area_type) -{ - card->part[card->nr_parts].size = size; - card->part[card->nr_parts].part_cfg = part_cfg; - sprintf(card->part[card->nr_parts].name, name, idx); - card->part[card->nr_parts].force_ro = ro; - card->part[card->nr_parts].area_type = area_type; - card->nr_parts++; -} - static inline bool mmc_large_sector(struct mmc_card *card) { return card->ext_csd.data_sector_size == 4096; } -/* - * The world is not perfect and supplies us with broken mmc/sdio devices. - * For at least some of these bugs we need a work-around. - */ - -struct mmc_fixup { - /* CID-specific fields. */ - const char *name; - - /* Valid revision range */ - u64 rev_start, rev_end; - - unsigned int manfid; - unsigned short oemid; - - /* SDIO-specfic fields. You can use SDIO_ANY_ID here of course */ - u16 cis_vendor, cis_device; - - /* for MMC cards */ - unsigned int ext_csd_rev; - - void (*vendor_fixup)(struct mmc_card *card, int data); - int data; -}; - -#define CID_MANFID_ANY (-1u) -#define CID_OEMID_ANY ((unsigned short) -1) -#define CID_NAME_ANY (NULL) - -#define EXT_CSD_REV_ANY (-1u) - -#define CID_MANFID_SANDISK 0x2 -#define CID_MANFID_TOSHIBA 0x11 -#define CID_MANFID_MICRON 0x13 -#define CID_MANFID_SAMSUNG 0x15 -#define CID_MANFID_KINGSTON 0x70 -#define CID_MANFID_HYNIX 0x90 - -#define END_FIXUP { NULL } - -#define _FIXUP_EXT(_name, _manfid, _oemid, _rev_start, _rev_end, \ - _cis_vendor, _cis_device, \ - _fixup, _data, _ext_csd_rev) \ - { \ - .name = (_name), \ - .manfid = (_manfid), \ - .oemid = (_oemid), \ - .rev_start = (_rev_start), \ - .rev_end = (_rev_end), \ - .cis_vendor = (_cis_vendor), \ - .cis_device = (_cis_device), \ - .vendor_fixup = (_fixup), \ - .data = (_data), \ - .ext_csd_rev = (_ext_csd_rev), \ - } - -#define MMC_FIXUP_REV(_name, _manfid, _oemid, _rev_start, _rev_end, \ - _fixup, _data, _ext_csd_rev) \ - _FIXUP_EXT(_name, _manfid, \ - _oemid, _rev_start, _rev_end, \ - SDIO_ANY_ID, SDIO_ANY_ID, \ - _fixup, _data, _ext_csd_rev) \ - -#define MMC_FIXUP(_name, _manfid, _oemid, _fixup, _data) \ - MMC_FIXUP_REV(_name, _manfid, _oemid, 0, -1ull, _fixup, _data, \ - EXT_CSD_REV_ANY) - -#define MMC_FIXUP_EXT_CSD_REV(_name, _manfid, _oemid, _fixup, _data, \ - _ext_csd_rev) \ - MMC_FIXUP_REV(_name, _manfid, _oemid, 0, -1ull, _fixup, _data, \ - _ext_csd_rev) - -#define SDIO_FIXUP(_vendor, _device, _fixup, _data) \ - _FIXUP_EXT(CID_NAME_ANY, CID_MANFID_ANY, \ - CID_OEMID_ANY, 0, -1ull, \ - _vendor, _device, \ - _fixup, _data, EXT_CSD_REV_ANY) \ - -#define cid_rev(hwrev, fwrev, year, month) \ - (((u64) hwrev) << 40 | \ - ((u64) fwrev) << 32 | \ - ((u64) year) << 16 | \ - ((u64) month)) - -#define cid_rev_card(card) \ - cid_rev(card->cid.hwrev, \ - card->cid.fwrev, \ - card->cid.year, \ - card->cid.month) - -/* - * Unconditionally quirk add/remove. - */ - -static inline void __maybe_unused add_quirk(struct mmc_card *card, int data) -{ - card->quirks |= data; -} - -static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data) -{ - card->quirks &= ~data; -} - #define mmc_card_mmc(c) ((c)->type == MMC_TYPE_MMC) #define mmc_card_sd(c) ((c)->type == MMC_TYPE_SD) #define mmc_card_sdio(c) ((c)->type == MMC_TYPE_SDIO) -#define mmc_card_present(c) ((c)->state & MMC_STATE_PRESENT) -#define mmc_card_readonly(c) ((c)->state & MMC_STATE_READONLY) -#define mmc_card_blockaddr(c) ((c)->state & MMC_STATE_BLOCKADDR) -#define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC) -#define mmc_card_removed(c) ((c) && ((c)->state & MMC_CARD_REMOVED)) -#define mmc_card_doing_bkops(c) ((c)->state & MMC_STATE_DOING_BKOPS) -#define mmc_card_suspended(c) ((c)->state & MMC_STATE_SUSPENDED) - -#define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT) -#define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY) -#define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR) -#define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC) -#define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED) -#define mmc_card_set_doing_bkops(c) ((c)->state |= MMC_STATE_DOING_BKOPS) -#define mmc_card_clr_doing_bkops(c) ((c)->state &= ~MMC_STATE_DOING_BKOPS) -#define mmc_card_set_suspended(c) ((c)->state |= MMC_STATE_SUSPENDED) -#define mmc_card_clr_suspended(c) ((c)->state &= ~MMC_STATE_SUSPENDED) - -/* - * Quirk add/remove for MMC products. - */ - -static inline void __maybe_unused add_quirk_mmc(struct mmc_card *card, int data) -{ - if (mmc_card_mmc(card)) - card->quirks |= data; -} - -static inline void __maybe_unused remove_quirk_mmc(struct mmc_card *card, - int data) -{ - if (mmc_card_mmc(card)) - card->quirks &= ~data; -} - -/* - * Quirk add/remove for SD products. - */ - -static inline void __maybe_unused add_quirk_sd(struct mmc_card *card, int data) -{ - if (mmc_card_sd(card)) - card->quirks |= data; -} - -static inline void __maybe_unused remove_quirk_sd(struct mmc_card *card, - int data) -{ - if (mmc_card_sd(card)) - card->quirks &= ~data; -} - -static inline int mmc_card_lenient_fn0(const struct mmc_card *c) -{ - return c->quirks & MMC_QUIRK_LENIENT_FN0; -} - -static inline int mmc_blksz_for_byte_mode(const struct mmc_card *c) -{ - return c->quirks & MMC_QUIRK_BLKSZ_FOR_BYTE_MODE; -} - -static inline int mmc_card_disable_cd(const struct mmc_card *c) -{ - return c->quirks & MMC_QUIRK_DISABLE_CD; -} - -static inline int mmc_card_nonstd_func_interface(const struct mmc_card *c) -{ - return c->quirks & MMC_QUIRK_NONSTD_FUNC_IF; -} - -static inline int mmc_card_broken_byte_mode_512(const struct mmc_card *c) -{ - return c->quirks & MMC_QUIRK_BROKEN_BYTE_MODE_512; -} - -static inline int mmc_card_long_read_time(const struct mmc_card *c) -{ - return c->quirks & MMC_QUIRK_LONG_READ_TIME; -} - -static inline int mmc_card_broken_irq_polling(const struct mmc_card *c) -{ - return c->quirks & MMC_QUIRK_BROKEN_IRQ_POLLING; -} - -static inline int mmc_card_broken_hpi(const struct mmc_card *c) -{ - return c->quirks & MMC_QUIRK_BROKEN_HPI; -} - -#define mmc_card_name(c) ((c)->cid.prod_name) -#define mmc_card_id(c) (dev_name(&(c)->dev)) - -#define mmc_dev_to_card(d) container_of(d, struct mmc_card, dev) - -/* - * MMC device driver (e.g., Flash card, I/O card...) - */ -struct mmc_driver { - struct device_driver drv; - int (*probe)(struct mmc_card *); - void (*remove)(struct mmc_card *); - void (*shutdown)(struct mmc_card *); -}; - -extern int mmc_register_driver(struct mmc_driver *); -extern void mmc_unregister_driver(struct mmc_driver *); - -extern void mmc_fixup_device(struct mmc_card *card, - const struct mmc_fixup *table); - #endif /* LINUX_MMC_CARD_H */ -- cgit v1.2.3 From 5857b29b96dcf208e4903ec6f20d132e6a77cac2 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Jan 2017 14:14:15 +0100 Subject: mmc: core: Move public functions from host.h to private headers A significant amount of functions are available through the public mmc host.h header file. Let's slim down this public mmc interface, as to prevent users from abusing it, by moving some of the functions to private mmc host.h header file. This change concentrates on moving the functions into private mmc headers, following changes may continue with additional clean-ups. Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Reviewed-by: Shawn Lin --- include/linux/mmc/host.h | 48 ++---------------------------------------------- 1 file changed, 2 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 7de05195bff6..97699d55b2ae 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -473,56 +473,20 @@ static inline int mmc_card_wake_sdio_irq(struct mmc_host *host) return host->pm_flags & MMC_PM_WAKE_SDIO_IRQ; } -static inline int mmc_host_cmd23(struct mmc_host *host) -{ - return host->caps & MMC_CAP_CMD23; -} - -static inline int mmc_boot_partition_access(struct mmc_host *host) -{ - return !(host->caps2 & MMC_CAP2_BOOTPART_NOACC); -} - -static inline int mmc_host_uhs(struct mmc_host *host) -{ - return host->caps & - (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | - MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | - MMC_CAP_UHS_DDR50); -} - +/* TODO: Move to private header */ static inline int mmc_card_hs(struct mmc_card *card) { return card->host->ios.timing == MMC_TIMING_SD_HS || card->host->ios.timing == MMC_TIMING_MMC_HS; } +/* TODO: Move to private header */ static inline int mmc_card_uhs(struct mmc_card *card) { return card->host->ios.timing >= MMC_TIMING_UHS_SDR12 && card->host->ios.timing <= MMC_TIMING_UHS_DDR50; } -static inline bool mmc_card_hs200(struct mmc_card *card) -{ - return card->host->ios.timing == MMC_TIMING_MMC_HS200; -} - -static inline bool mmc_card_ddr52(struct mmc_card *card) -{ - return card->host->ios.timing == MMC_TIMING_MMC_DDR52; -} - -static inline bool mmc_card_hs400(struct mmc_card *card) -{ - return card->host->ios.timing == MMC_TIMING_MMC_HS400; -} - -static inline bool mmc_card_hs400es(struct mmc_card *card) -{ - return card->host->ios.enhanced_strobe; -} - void mmc_retune_timer_stop(struct mmc_host *host); static inline void mmc_retune_needed(struct mmc_host *host) @@ -531,12 +495,6 @@ static inline void mmc_retune_needed(struct mmc_host *host) host->need_retune = 1; } -static inline void mmc_retune_recheck(struct mmc_host *host) -{ - if (host->hold_retune <= 1) - host->retune_now = 1; -} - static inline bool mmc_can_retune(struct mmc_host *host) { return host->can_retune == 1; @@ -544,7 +502,5 @@ static inline bool mmc_can_retune(struct mmc_host *host) int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); int mmc_abort_tuning(struct mmc_host *host, u32 opcode); -void mmc_retune_pause(struct mmc_host *host); -void mmc_retune_unpause(struct mmc_host *host); #endif /* LINUX_MMC_HOST_H */ -- cgit v1.2.3 From 23888bfe9bd730d42e326cc5f6f1188beb948f97 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Jan 2017 14:14:16 +0100 Subject: mmc: core: Don't use extern declarations of public mmc functions Using extern when declaring functions in the public header, core.h, is redundant. Let's just remove the use of it. Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Reviewed-by: Shawn Lin --- include/linux/mmc/core.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 6440e10c86cd..6dcb339fcd45 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -158,13 +158,14 @@ struct mmc_request { struct mmc_card; struct mmc_async_req; -extern struct mmc_async_req *mmc_start_req(struct mmc_host *, - struct mmc_async_req *, - enum mmc_blk_status *); -extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *); -extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int); - -extern int mmc_hw_reset(struct mmc_host *host); -extern void mmc_set_data_timeout(struct mmc_data *, const struct mmc_card *); +struct mmc_async_req *mmc_start_req(struct mmc_host *host, + struct mmc_async_req *areq, + enum mmc_blk_status *ret_stat); +void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq); +int mmc_wait_for_cmd(struct mmc_host *host, struct mmc_command *cmd, + int retries); + +int mmc_hw_reset(struct mmc_host *host); +void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card); #endif /* LINUX_MMC_CORE_H */ -- cgit v1.2.3 From 20dd03734cac41a0545dd24f5e81d8ff0c80874b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 19 Jan 2017 21:07:17 +0100 Subject: mmc: host: tmio: SDIO_STATUS_QUIRK is rather SDIO_STATUS_SETBITS QUIRK sounds like there is something wrong, but actually there are just some bits which need to be 1. Rename it to be more clear. Signed-off-by: Wolfram Sang Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index fba44abd05ba..a1520d88ebf3 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -94,10 +94,8 @@ */ #define TMIO_MMC_HAVE_CMD12_CTRL (1 << 7) -/* - * Some controllers needs to set 1 on SDIO status reserved bits - */ -#define TMIO_MMC_SDIO_STATUS_QUIRK (1 << 8) +/* Controller has some SDIO status bits which must be 1 */ +#define TMIO_MMC_SDIO_STATUS_SETBITS (1 << 8) /* * Some controllers have a 32-bit wide data port register -- cgit v1.2.3 From 20f921bb01140207983e9fc3af4bb60f1f9fc0d5 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 25 Jan 2017 12:04:17 +0100 Subject: mmc: core: Invent MMC_CAP_3_3V_DDR According the JEDEC specification an eMMC card supporting 1.8V vccq in DDR mode should also be capable of 3.3V. However, it's been reported that some mmc hosts supports 3.3V, but not 1.8V. Currently the mmc core implements an error handling when the host fails to set 1.8V for vccq, by falling back to 3.3V. Unfortunate, this seems to be insufficient for some mmc hosts. To enable these to use eMMC DDR mode let's invent a new mmc cap, MMC_CAP_3_3V_DDR, which tells whether they support the eMMC 3.3V DDR mode. In case MMC_CAP_3_3V_DDR is set, but not MMC_CAP_1_8V_DDR, let's change to remain on the 3.3V, as it's the default voltage level for vccq, set by the earlier power up sequence. As this change introduces MMC_CAP_3_3V_DDR, let's take the opportunity to do some re-formatting of the related defines in the header file. Signed-off-by: Ulf Hansson Reviewed-by: Shawn Lin Tested-by: Jan Glauber Tested-by: Stefan Wahren --- include/linux/mmc/host.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 97699d55b2ae..83f1c4a9f03b 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -259,17 +259,16 @@ struct mmc_host { #define MMC_CAP_NONREMOVABLE (1 << 8) /* Nonremovable e.g. eMMC */ #define MMC_CAP_WAIT_WHILE_BUSY (1 << 9) /* Waits while card is busy */ #define MMC_CAP_ERASE (1 << 10) /* Allow erase/trim commands */ -#define MMC_CAP_1_8V_DDR (1 << 11) /* can support */ - /* DDR mode at 1.8V */ -#define MMC_CAP_1_2V_DDR (1 << 12) /* can support */ - /* DDR mode at 1.2V */ -#define MMC_CAP_POWER_OFF_CARD (1 << 13) /* Can power off after boot */ -#define MMC_CAP_BUS_WIDTH_TEST (1 << 14) /* CMD14/CMD19 bus width ok */ -#define MMC_CAP_UHS_SDR12 (1 << 15) /* Host supports UHS SDR12 mode */ -#define MMC_CAP_UHS_SDR25 (1 << 16) /* Host supports UHS SDR25 mode */ -#define MMC_CAP_UHS_SDR50 (1 << 17) /* Host supports UHS SDR50 mode */ -#define MMC_CAP_UHS_SDR104 (1 << 18) /* Host supports UHS SDR104 mode */ -#define MMC_CAP_UHS_DDR50 (1 << 19) /* Host supports UHS DDR50 mode */ +#define MMC_CAP_3_3V_DDR (1 << 11) /* Host supports eMMC DDR 3.3V */ +#define MMC_CAP_1_8V_DDR (1 << 12) /* Host supports eMMC DDR 1.8V */ +#define MMC_CAP_1_2V_DDR (1 << 13) /* Host supports eMMC DDR 1.2V */ +#define MMC_CAP_POWER_OFF_CARD (1 << 14) /* Can power off after boot */ +#define MMC_CAP_BUS_WIDTH_TEST (1 << 15) /* CMD14/CMD19 bus width ok */ +#define MMC_CAP_UHS_SDR12 (1 << 16) /* Host supports UHS SDR12 mode */ +#define MMC_CAP_UHS_SDR25 (1 << 17) /* Host supports UHS SDR25 mode */ +#define MMC_CAP_UHS_SDR50 (1 << 18) /* Host supports UHS SDR50 mode */ +#define MMC_CAP_UHS_SDR104 (1 << 19) /* Host supports UHS SDR104 mode */ +#define MMC_CAP_UHS_DDR50 (1 << 20) /* Host supports UHS DDR50 mode */ #define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */ #define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */ #define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */ -- cgit v1.2.3 From c3399ef55d8e8295293808eba32e3f7056526324 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 1 Feb 2017 13:47:53 +0100 Subject: mmc: core: rename mmc_start_req() to *areq() With the coexisting __mmc_start_request(), mmc_start_request() and __mmc_start_req() it is a bit confusing that mmc_start_req() actually does not start a normal request, but an asynchronous request. Rename it to mmc_start_areq() to make it explicit what the function is doing, also fix the kerneldoc for this function while we're at it. Signed-off-by: Linus Walleij Signed-off-by: Ulf Hansson --- include/linux/mmc/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 6dcb339fcd45..a0c63ea28796 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -158,7 +158,7 @@ struct mmc_request { struct mmc_card; struct mmc_async_req; -struct mmc_async_req *mmc_start_req(struct mmc_host *host, +struct mmc_async_req *mmc_start_areq(struct mmc_host *host, struct mmc_async_req *areq, enum mmc_blk_status *ret_stat); void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq); -- cgit v1.2.3 From 30b888ba950d9f77326b50a4aa2d7d99557d5718 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Sat, 28 Jan 2017 09:55:20 -0500 Subject: radix-tree: Add radix_tree_iter_tag_clear() The counterpart to radix_tree_iter_tag_set(), used by the IDR code Signed-off-by: Matthew Wilcox Reviewed-by: Rehas Sachdeva --- include/linux/radix-tree.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 32683c7c2e0d..8bf4ef448ce1 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -332,7 +332,9 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, unsigned long index, unsigned int tag); int radix_tree_tag_get(const struct radix_tree_root *, unsigned long index, unsigned int tag); -void radix_tree_iter_tag_set(struct radix_tree_root *root, +void radix_tree_iter_tag_set(struct radix_tree_root *, + const struct radix_tree_iter *iter, unsigned int tag); +void radix_tree_iter_tag_clear(struct radix_tree_root *, const struct radix_tree_iter *iter, unsigned int tag); unsigned int radix_tree_gang_lookup_tag(const struct radix_tree_root *, void **results, -- cgit v1.2.3 From 0ac398ef391b53122976325ab6953456ce8e8310 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Sat, 28 Jan 2017 09:56:22 -0500 Subject: radix-tree: Add radix_tree_iter_delete Factor the deletion code out into __radix_tree_delete() and provide a nice iterator-based wrapper around it. If we free the node, advance the iterator to avoid reading from freed memory. Signed-off-by: Matthew Wilcox --- include/linux/radix-tree.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 8bf4ef448ce1..05f715cb8062 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -311,6 +311,8 @@ void __radix_tree_delete_node(struct radix_tree_root *root, struct radix_tree_node *node, radix_tree_update_node_t update_node, void *private); +void radix_tree_iter_delete(struct radix_tree_root *, + struct radix_tree_iter *iter, void **slot); void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); void *radix_tree_delete(struct radix_tree_root *, unsigned long); void radix_tree_clear_tags(struct radix_tree_root *root, -- cgit v1.2.3 From ca86cad7380e373fa17bc0ee8aff121380323e69 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Sat, 4 Feb 2017 13:10:38 -0500 Subject: audit: log module name on init_module This adds a new auxiliary record MODULE_INIT to the SYSCALL event. We get finit_module for free since it made most sense to hook this in to load_module(). https://github.com/linux-audit/audit-kernel/issues/7 https://github.com/linux-audit/audit-kernel/wiki/RFE-Module-Load-Record-Format Signed-off-by: Richard Guy Briggs Acked-by: Jessica Yu [PM: corrected links in the commit description] Signed-off-by: Paul Moore --- include/linux/audit.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 2be99b276d29..aba3a2684300 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -360,6 +360,7 @@ extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *old); extern void __audit_log_capset(const struct cred *new, const struct cred *old); extern void __audit_mmap_fd(int fd, int flags); +extern void __audit_log_kern_module(char *name); static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -450,6 +451,12 @@ static inline void audit_mmap_fd(int fd, int flags) __audit_mmap_fd(fd, flags); } +static inline void audit_log_kern_module(char *name) +{ + if (!audit_dummy_context()) + __audit_log_kern_module(name); +} + extern int audit_n_rules; extern int audit_signals; #else /* CONFIG_AUDITSYSCALL */ @@ -561,6 +568,11 @@ static inline void audit_log_capset(const struct cred *new, { } static inline void audit_mmap_fd(int fd, int flags) { } + +static inline void audit_log_kern_module(char *name) +{ +} + static inline void audit_ptrace(struct task_struct *t) { } #define audit_n_rules 0 -- cgit v1.2.3 From 251af29c320d86071664f02c76f0d063a19fefdf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 11 Feb 2017 10:37:38 -0500 Subject: nlm: Ensure callback code also checks that the files match It is not sufficient to just check that the lock pids match when granting a callback, we also need to ensure that we're granting the callback on the right file. Reported-by: Pankaj Singh Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/lockd/lockd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index c15373894a42..b37dee3acaba 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -355,7 +355,8 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp) static inline int nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2) { - return fl1->fl_pid == fl2->fl_pid + return file_inode(fl1->fl_file) == file_inode(fl2->fl_file) + && fl1->fl_pid == fl2->fl_pid && fl1->fl_owner == fl2->fl_owner && fl1->fl_start == fl2->fl_start && fl1->fl_end == fl2->fl_end -- cgit v1.2.3 From 0a835c4f090af2c76fc2932c539c3b32fd21fbbb Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 20 Dec 2016 10:27:56 -0500 Subject: Reimplement IDR and IDA using the radix tree The IDR is very similar to the radix tree. It has some functionality that the radix tree did not have (alloc next free, cyclic allocation, a callback-based for_each, destroy tree), which is readily implementable on top of the radix tree. A few small changes were needed in order to use a tag to represent nodes with free space below them. More extensive changes were needed to support storing NULL as a valid entry in an IDR. Plain radix trees still interpret NULL as a not-present entry. The IDA is reimplemented as a client of the newly enhanced radix tree. As in the current implementation, it uses a bitmap at the last level of the tree. Signed-off-by: Matthew Wilcox Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Tejun Heo Signed-off-by: Andrew Morton --- include/linux/idr.h | 145 ++++++++++++++++++++------------------------- include/linux/radix-tree.h | 49 +++++++++++++-- 2 files changed, 110 insertions(+), 84 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 3c01b89aed67..f58c0a3addc3 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -12,47 +12,28 @@ #ifndef __IDR_H__ #define __IDR_H__ -#include -#include -#include -#include +#include +#include + +struct idr { + struct radix_tree_root idr_rt; + unsigned int idr_next; +}; /* - * Using 6 bits at each layer allows us to allocate 7 layers out of each page. - * 8 bits only gave us 3 layers out of every pair of pages, which is less - * efficient except for trees with a largest element between 192-255 inclusive. + * The IDR API does not expose the tagging functionality of the radix tree + * to users. Use tag 0 to track whether a node has free space below it. */ -#define IDR_BITS 6 -#define IDR_SIZE (1 << IDR_BITS) -#define IDR_MASK ((1 << IDR_BITS)-1) - -struct idr_layer { - int prefix; /* the ID prefix of this idr_layer */ - int layer; /* distance from leaf */ - struct idr_layer __rcu *ary[1<cur); + return READ_ONCE(idr->idr_next); } /** @@ -77,7 +58,7 @@ static inline unsigned int idr_get_cursor(struct idr *idr) */ static inline void idr_set_cursor(struct idr *idr, unsigned int val) { - WRITE_ONCE(idr->cur, val); + WRITE_ONCE(idr->idr_next, val); } /** @@ -97,22 +78,31 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val) * period). */ -/* - * This is what we export. - */ - -void *idr_find_slowpath(struct idr *idp, int id); void idr_preload(gfp_t gfp_mask); -int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask); -int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask); -int idr_for_each(struct idr *idp, +int idr_alloc(struct idr *, void *entry, int start, int end, gfp_t); +int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t); +int idr_for_each(const struct idr *, int (*fn)(int id, void *p, void *data), void *data); -void *idr_get_next(struct idr *idp, int *nextid); -void *idr_replace(struct idr *idp, void *ptr, int id); -void idr_remove(struct idr *idp, int id); -void idr_destroy(struct idr *idp); -void idr_init(struct idr *idp); -bool idr_is_empty(struct idr *idp); +void *idr_get_next(struct idr *, int *nextid); +void *idr_replace(struct idr *, void *, int id); +void idr_destroy(struct idr *); + +static inline void idr_remove(struct idr *idr, int id) +{ + radix_tree_delete(&idr->idr_rt, id); +} + +static inline void idr_init(struct idr *idr) +{ + INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER); + idr->idr_next = 0; +} + +static inline bool idr_is_empty(const struct idr *idr) +{ + return radix_tree_empty(&idr->idr_rt) && + radix_tree_tagged(&idr->idr_rt, IDR_FREE); +} /** * idr_preload_end - end preload section started with idr_preload() @@ -137,19 +127,14 @@ static inline void idr_preload_end(void) * This function can be called under rcu_read_lock(), given that the leaf * pointers lifetimes are correctly managed. */ -static inline void *idr_find(struct idr *idr, int id) +static inline void *idr_find(const struct idr *idr, int id) { - struct idr_layer *hint = rcu_dereference_raw(idr->hint); - - if (hint && (id & ~IDR_MASK) == hint->prefix) - return rcu_dereference_raw(hint->ary[id & IDR_MASK]); - - return idr_find_slowpath(idr, id); + return radix_tree_lookup(&idr->idr_rt, id); } /** * idr_for_each_entry - iterate over an idr's elements of a given type - * @idp: idr handle + * @idr: idr handle * @entry: the type * to use as cursor * @id: id entry's key * @@ -157,57 +142,60 @@ static inline void *idr_find(struct idr *idr, int id) * after normal terminatinon @entry is left with the value NULL. This * is convenient for a "not found" value. */ -#define idr_for_each_entry(idp, entry, id) \ - for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id) +#define idr_for_each_entry(idr, entry, id) \ + for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id) /** - * idr_for_each_entry - continue iteration over an idr's elements of a given type - * @idp: idr handle + * idr_for_each_entry_continue - continue iteration over an idr's elements of a given type + * @idr: idr handle * @entry: the type * to use as cursor * @id: id entry's key * * Continue to iterate over list of given type, continuing after * the current position. */ -#define idr_for_each_entry_continue(idp, entry, id) \ - for ((entry) = idr_get_next((idp), &(id)); \ +#define idr_for_each_entry_continue(idr, entry, id) \ + for ((entry) = idr_get_next((idr), &(id)); \ entry; \ - ++id, (entry) = idr_get_next((idp), &(id))) + ++id, (entry) = idr_get_next((idr), &(id))) /* * IDA - IDR based id allocator, use when translation from id to * pointer isn't necessary. - * - * IDA_BITMAP_LONGS is calculated to be one less to accommodate - * ida_bitmap->nr_busy so that the whole struct fits in 128 bytes. */ #define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */ -#define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long) - 1) +#define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long)) #define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8) struct ida_bitmap { - long nr_busy; unsigned long bitmap[IDA_BITMAP_LONGS]; }; struct ida { - struct idr idr; + struct radix_tree_root ida_rt; struct ida_bitmap *free_bitmap; }; -#define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, } -#define DEFINE_IDA(name) struct ida name = IDA_INIT(name) +#define IDA_INIT { \ + .ida_rt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT), \ +} +#define DEFINE_IDA(name) struct ida name = IDA_INIT int ida_pre_get(struct ida *ida, gfp_t gfp_mask); int ida_get_new_above(struct ida *ida, int starting_id, int *p_id); void ida_remove(struct ida *ida, int id); void ida_destroy(struct ida *ida); -void ida_init(struct ida *ida); int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, gfp_t gfp_mask); void ida_simple_remove(struct ida *ida, unsigned int id); +static inline void ida_init(struct ida *ida) +{ + INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT); + ida->free_bitmap = NULL; +} + /** * ida_get_new - allocate new ID * @ida: idr handle @@ -220,11 +208,8 @@ static inline int ida_get_new(struct ida *ida, int *p_id) return ida_get_new_above(ida, 0, p_id); } -static inline bool ida_is_empty(struct ida *ida) +static inline bool ida_is_empty(const struct ida *ida) { - return idr_is_empty(&ida->idr); + return radix_tree_empty(&ida->ida_rt); } - -void __init idr_init_cache(void); - #endif /* __IDR_H__ */ diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 05f715cb8062..2ba0c1f46c84 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -105,7 +105,10 @@ struct radix_tree_node { unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; }; -/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */ +/* The top bits of gfp_mask are used to store the root tags and the IDR flag */ +#define ROOT_IS_IDR ((__force gfp_t)(1 << __GFP_BITS_SHIFT)) +#define ROOT_TAG_SHIFT (__GFP_BITS_SHIFT + 1) + struct radix_tree_root { gfp_t gfp_mask; struct radix_tree_node __rcu *rnode; @@ -358,10 +361,14 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index, unsigned new_order); int radix_tree_join(struct radix_tree_root *, unsigned long index, unsigned new_order, void *); +void **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *, + gfp_t, int end); -#define RADIX_TREE_ITER_TAG_MASK 0x00FF /* tag index in lower byte */ -#define RADIX_TREE_ITER_TAGGED 0x0100 /* lookup tagged slots */ -#define RADIX_TREE_ITER_CONTIG 0x0200 /* stop at first hole */ +enum { + RADIX_TREE_ITER_TAG_MASK = 0x0f, /* tag index in lower nybble */ + RADIX_TREE_ITER_TAGGED = 0x10, /* lookup tagged slots */ + RADIX_TREE_ITER_CONTIG = 0x20, /* stop at first hole */ +}; /** * radix_tree_iter_init - initialize radix tree iterator @@ -402,6 +409,40 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start) void **radix_tree_next_chunk(const struct radix_tree_root *, struct radix_tree_iter *iter, unsigned flags); +/** + * radix_tree_iter_lookup - look up an index in the radix tree + * @root: radix tree root + * @iter: iterator state + * @index: key to look up + * + * If @index is present in the radix tree, this function returns the slot + * containing it and updates @iter to describe the entry. If @index is not + * present, it returns NULL. + */ +static inline void **radix_tree_iter_lookup(const struct radix_tree_root *root, + struct radix_tree_iter *iter, unsigned long index) +{ + radix_tree_iter_init(iter, index); + return radix_tree_next_chunk(root, iter, RADIX_TREE_ITER_CONTIG); +} + +/** + * radix_tree_iter_find - find a present entry + * @root: radix tree root + * @iter: iterator state + * @index: start location + * + * This function returns the slot containing the entry with the lowest index + * which is at least @index. If @index is larger than any present entry, this + * function returns NULL. The @iter is updated to describe the entry found. + */ +static inline void **radix_tree_iter_find(const struct radix_tree_root *root, + struct radix_tree_iter *iter, unsigned long index) +{ + radix_tree_iter_init(iter, index); + return radix_tree_next_chunk(root, iter, 0); +} + /** * radix_tree_iter_retry - retry this chunk of the iteration * @iter: iterator state -- cgit v1.2.3 From 7ad3d4d85c7af9632055a6ac0aa15b6b6a321c6b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 16 Dec 2016 11:55:56 -0500 Subject: ida: Move ida_bitmap to a percpu variable When we preload the IDA, we allocate an IDA bitmap. Instead of storing that preallocated bitmap in the IDA, we store it in a percpu variable. Generally there are more IDAs in the system than CPUs, so this cuts down on the number of preallocated bitmaps that are unused, and about half of the IDA users did not call ida_destroy() so they were leaking IDA bitmaps. Signed-off-by: Matthew Wilcox --- include/linux/idr.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index f58c0a3addc3..2027c7aba50d 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -14,6 +14,7 @@ #include #include +#include struct idr { struct radix_tree_root idr_rt; @@ -171,9 +172,10 @@ struct ida_bitmap { unsigned long bitmap[IDA_BITMAP_LONGS]; }; +DECLARE_PER_CPU(struct ida_bitmap *, ida_bitmap); + struct ida { struct radix_tree_root ida_rt; - struct ida_bitmap *free_bitmap; }; #define IDA_INIT { \ @@ -193,7 +195,6 @@ void ida_simple_remove(struct ida *ida, unsigned int id); static inline void ida_init(struct ida *ida) { INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT); - ida->free_bitmap = NULL; } /** -- cgit v1.2.3 From d3e709e63e97e5f3f129b639991cfe266da60bae Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 22 Dec 2016 13:30:22 -0500 Subject: idr: Return the deleted entry from idr_remove It is a relatively common idiom (8 instances) to first look up an IDR entry, and then remove it from the tree if it is found, possibly doing further operations upon the entry afterwards. If we change idr_remove() to return the removed object, all of these users can save themselves a walk of the IDR tree. Signed-off-by: Matthew Wilcox --- include/linux/idr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 2027c7aba50d..bf70b3ef0a07 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -88,9 +88,9 @@ void *idr_get_next(struct idr *, int *nextid); void *idr_replace(struct idr *, void *, int id); void idr_destroy(struct idr *); -static inline void idr_remove(struct idr *idr, int id) +static inline void *idr_remove(struct idr *idr, int id) { - radix_tree_delete(&idr->idr_rt, id); + return radix_tree_delete_item(&idr->idr_rt, id, NULL); } static inline void idr_init(struct idr *idr) -- cgit v1.2.3 From d58275bc96ae933b1b3888af80920dd6b020c505 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 16 Jan 2017 17:10:21 -0500 Subject: radix-tree: Store a pointer to the root in each node Instead of having this mysterious private_data in each radix_tree_node, store a pointer to the root, which can be useful for debugging. This also relieves the mm code from the duty of updating it. Acked-by: Johannes Weiner Signed-off-by: Matthew Wilcox --- include/linux/radix-tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 2ba0c1f46c84..d250059bbfa4 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -96,7 +96,7 @@ struct radix_tree_node { unsigned char count; /* Total entry count */ unsigned char exceptional; /* Exceptional entry count */ struct radix_tree_node *parent; /* Used when ascending tree */ - void *private_data; /* For tree user */ + struct radix_tree_root *root; /* The tree we belong to */ union { struct list_head private_list; /* For tree user */ struct rcu_head rcu_head; /* Used when freeing node */ -- cgit v1.2.3 From 12320d0ff1c9d5582f5c35e4bb8b9c70c475fd71 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 13 Feb 2017 15:22:48 -0500 Subject: radix-tree: Add rcu_dereference and rcu_assign_pointer calls Some of these have been missing for many years. Others were recently introduced by me. Fortunately, we have tools that help us find such things. Signed-off-by: Matthew Wilcox --- include/linux/radix-tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index d250059bbfa4..0b502de7d23a 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -561,7 +561,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) return NULL; found: - if (unlikely(radix_tree_is_internal_node(*slot))) + if (unlikely(radix_tree_is_internal_node(rcu_dereference_raw(*slot)))) return __radix_tree_next_slot(slot, iter, flags); return slot; } -- cgit v1.2.3 From d7b627277b57370223d682cede979a279284b12a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 13 Feb 2017 15:58:24 -0500 Subject: radix-tree: Fix __rcu annotations Many places were missing __rcu annotations. A few places needed a few lines of explanation about why it was safe to not use RCU accessors. Add a custom CFLAGS setting to the Makefile to ensure that new patches don't miss RCU annotations. Signed-off-by: Matthew Wilcox --- include/linux/radix-tree.h | 110 ++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 0b502de7d23a..3e5735064b71 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -221,10 +221,8 @@ static inline unsigned int iter_shift(const struct radix_tree_iter *iter) */ /** - * radix_tree_deref_slot - dereference a slot - * @pslot: pointer to slot, returned by radix_tree_lookup_slot - * Returns: item that was stored in that slot with any direct pointer flag - * removed. + * radix_tree_deref_slot - dereference a slot + * @slot: slot pointer, returned by radix_tree_lookup_slot * * For use with radix_tree_lookup_slot(). Caller must hold tree at least read * locked across slot lookup and dereference. Not required if write lock is @@ -232,26 +230,27 @@ static inline unsigned int iter_shift(const struct radix_tree_iter *iter) * * radix_tree_deref_retry must be used to confirm validity of the pointer if * only the read lock is held. + * + * Return: entry stored in that slot. */ -static inline void *radix_tree_deref_slot(void **pslot) +static inline void *radix_tree_deref_slot(void __rcu **slot) { - return rcu_dereference(*pslot); + return rcu_dereference(*slot); } /** - * radix_tree_deref_slot_protected - dereference a slot without RCU lock but with tree lock held - * @pslot: pointer to slot, returned by radix_tree_lookup_slot - * Returns: item that was stored in that slot with any direct pointer flag - * removed. - * - * Similar to radix_tree_deref_slot but only used during migration when a pages - * mapping is being moved. The caller does not hold the RCU read lock but it - * must hold the tree lock to prevent parallel updates. + * radix_tree_deref_slot_protected - dereference a slot with tree lock held + * @slot: slot pointer, returned by radix_tree_lookup_slot + * + * Similar to radix_tree_deref_slot. The caller does not hold the RCU read + * lock but it must hold the tree lock to prevent parallel updates. + * + * Return: entry stored in that slot. */ -static inline void *radix_tree_deref_slot_protected(void **pslot, +static inline void *radix_tree_deref_slot_protected(void __rcu **slot, spinlock_t *treelock) { - return rcu_dereference_protected(*pslot, lockdep_is_held(treelock)); + return rcu_dereference_protected(*slot, lockdep_is_held(treelock)); } /** @@ -287,9 +286,9 @@ static inline int radix_tree_exception(void *arg) return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK); } -int __radix_tree_create(struct radix_tree_root *root, unsigned long index, +int __radix_tree_create(struct radix_tree_root *, unsigned long index, unsigned order, struct radix_tree_node **nodep, - void ***slotp); + void __rcu ***slotp); int __radix_tree_insert(struct radix_tree_root *, unsigned long index, unsigned order, void *); static inline int radix_tree_insert(struct radix_tree_root *root, @@ -298,42 +297,41 @@ static inline int radix_tree_insert(struct radix_tree_root *root, return __radix_tree_insert(root, index, 0, entry); } void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index, - struct radix_tree_node **nodep, void ***slotp); + struct radix_tree_node **nodep, void __rcu ***slotp); void *radix_tree_lookup(const struct radix_tree_root *, unsigned long); -void **radix_tree_lookup_slot(const struct radix_tree_root *, unsigned long); +void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *, + unsigned long index); typedef void (*radix_tree_update_node_t)(struct radix_tree_node *, void *); -void __radix_tree_replace(struct radix_tree_root *root, - struct radix_tree_node *node, - void **slot, void *item, +void __radix_tree_replace(struct radix_tree_root *, struct radix_tree_node *, + void __rcu **slot, void *entry, radix_tree_update_node_t update_node, void *private); void radix_tree_iter_replace(struct radix_tree_root *, - const struct radix_tree_iter *, void **slot, void *item); -void radix_tree_replace_slot(struct radix_tree_root *root, - void **slot, void *item); -void __radix_tree_delete_node(struct radix_tree_root *root, - struct radix_tree_node *node, + const struct radix_tree_iter *, void __rcu **slot, void *entry); +void radix_tree_replace_slot(struct radix_tree_root *, + void __rcu **slot, void *entry); +void __radix_tree_delete_node(struct radix_tree_root *, + struct radix_tree_node *, radix_tree_update_node_t update_node, void *private); void radix_tree_iter_delete(struct radix_tree_root *, - struct radix_tree_iter *iter, void **slot); + struct radix_tree_iter *iter, void __rcu **slot); void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); void *radix_tree_delete(struct radix_tree_root *, unsigned long); -void radix_tree_clear_tags(struct radix_tree_root *root, - struct radix_tree_node *node, - void **slot); +void radix_tree_clear_tags(struct radix_tree_root *, struct radix_tree_node *, + void __rcu **slot); unsigned int radix_tree_gang_lookup(const struct radix_tree_root *, void **results, unsigned long first_index, unsigned int max_items); unsigned int radix_tree_gang_lookup_slot(const struct radix_tree_root *, - void ***results, unsigned long *indices, + void __rcu ***results, unsigned long *indices, unsigned long first_index, unsigned int max_items); int radix_tree_preload(gfp_t gfp_mask); int radix_tree_maybe_preload(gfp_t gfp_mask); int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order); void radix_tree_init(void); -void *radix_tree_tag_set(struct radix_tree_root *root, +void *radix_tree_tag_set(struct radix_tree_root *, unsigned long index, unsigned int tag); -void *radix_tree_tag_clear(struct radix_tree_root *root, +void *radix_tree_tag_clear(struct radix_tree_root *, unsigned long index, unsigned int tag); int radix_tree_tag_get(const struct radix_tree_root *, unsigned long index, unsigned int tag); @@ -341,15 +339,13 @@ void radix_tree_iter_tag_set(struct radix_tree_root *, const struct radix_tree_iter *iter, unsigned int tag); void radix_tree_iter_tag_clear(struct radix_tree_root *, const struct radix_tree_iter *iter, unsigned int tag); -unsigned int -radix_tree_gang_lookup_tag(const struct radix_tree_root *, void **results, - unsigned long first_index, unsigned int max_items, - unsigned int tag); -unsigned int -radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *, void ***results, - unsigned long first_index, unsigned int max_items, - unsigned int tag); -int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag); +unsigned int radix_tree_gang_lookup_tag(const struct radix_tree_root *, + void **results, unsigned long first_index, + unsigned int max_items, unsigned int tag); +unsigned int radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *, + void __rcu ***results, unsigned long first_index, + unsigned int max_items, unsigned int tag); +int radix_tree_tagged(const struct radix_tree_root *, unsigned int tag); static inline void radix_tree_preload_end(void) { @@ -361,7 +357,7 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index, unsigned new_order); int radix_tree_join(struct radix_tree_root *, unsigned long index, unsigned new_order, void *); -void **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *, +void __rcu **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *, gfp_t, int end); enum { @@ -377,7 +373,7 @@ enum { * @start: iteration starting index * Returns: NULL */ -static __always_inline void ** +static __always_inline void __rcu ** radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start) { /* @@ -406,7 +402,7 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start) * Also it fills @iter with data about chunk: position in the tree (index), * its end (next_index), and constructs a bit mask for tagged iterating (tags). */ -void **radix_tree_next_chunk(const struct radix_tree_root *, +void __rcu **radix_tree_next_chunk(const struct radix_tree_root *, struct radix_tree_iter *iter, unsigned flags); /** @@ -419,7 +415,8 @@ void **radix_tree_next_chunk(const struct radix_tree_root *, * containing it and updates @iter to describe the entry. If @index is not * present, it returns NULL. */ -static inline void **radix_tree_iter_lookup(const struct radix_tree_root *root, +static inline void __rcu ** +radix_tree_iter_lookup(const struct radix_tree_root *root, struct radix_tree_iter *iter, unsigned long index) { radix_tree_iter_init(iter, index); @@ -436,7 +433,8 @@ static inline void **radix_tree_iter_lookup(const struct radix_tree_root *root, * which is at least @index. If @index is larger than any present entry, this * function returns NULL. The @iter is updated to describe the entry found. */ -static inline void **radix_tree_iter_find(const struct radix_tree_root *root, +static inline void __rcu ** +radix_tree_iter_find(const struct radix_tree_root *root, struct radix_tree_iter *iter, unsigned long index) { radix_tree_iter_init(iter, index); @@ -453,7 +451,7 @@ static inline void **radix_tree_iter_find(const struct radix_tree_root *root, * and continue the iteration. */ static inline __must_check -void **radix_tree_iter_retry(struct radix_tree_iter *iter) +void __rcu **radix_tree_iter_retry(struct radix_tree_iter *iter) { iter->next_index = iter->index; iter->tags = 0; @@ -476,7 +474,7 @@ __radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots) * have been invalidated by an insertion or deletion. Call this function * before releasing the lock to continue the iteration from the next index. */ -void **__must_check radix_tree_iter_resume(void **slot, +void __rcu **__must_check radix_tree_iter_resume(void __rcu **slot, struct radix_tree_iter *iter); /** @@ -492,11 +490,11 @@ radix_tree_chunk_size(struct radix_tree_iter *iter) } #ifdef CONFIG_RADIX_TREE_MULTIORDER -void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, - unsigned flags); +void __rcu **__radix_tree_next_slot(void __rcu **slot, + struct radix_tree_iter *iter, unsigned flags); #else /* Can't happen without sibling entries, but the compiler can't tell that */ -static inline void ** __radix_tree_next_slot(void **slot, +static inline void __rcu **__radix_tree_next_slot(void __rcu **slot, struct radix_tree_iter *iter, unsigned flags) { return slot; @@ -522,8 +520,8 @@ static inline void ** __radix_tree_next_slot(void **slot, * b) we are doing non-tagged iteration, and iter->index and iter->next_index * have been set up so that radix_tree_chunk_size() returns 1 or 0. */ -static __always_inline void ** -radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) +static __always_inline void __rcu **radix_tree_next_slot(void __rcu **slot, + struct radix_tree_iter *iter, unsigned flags) { if (flags & RADIX_TREE_ITER_TAGGED) { iter->tags >>= 1; -- cgit v1.2.3 From 40137906c5f55c252194ef5834130383e639536f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 11 Feb 2017 19:26:47 +0800 Subject: rhashtable: Add nested tables This patch adds code that handles GFP_ATOMIC kmalloc failure on insertion. As we cannot use vmalloc, we solve it by making our hash table nested. That is, we allocate single pages at each level and reach our desired table size by nesting them. When a nested table is created, only a single page is allocated at the top-level. Lower levels are allocated on demand during insertion. Therefore for each insertion to succeed, only two (non-consecutive) pages are needed. After a nested table is created, a rehash will be scheduled in order to switch to a vmalloced table as soon as possible. Also, the rehash code will never rehash into a nested table. If we detect a nested table during a rehash, the rehash will be aborted and a new rehash will be scheduled. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 78 +++++++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 5c132d3188be..f2e12a845910 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -61,6 +61,7 @@ struct rhlist_head { /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets + * @nest: Number of bits of first-level nested table. * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash * @locks_mask: Mask to apply before accessing locks[] @@ -68,10 +69,12 @@ struct rhlist_head { * @walkers: List of active walkers * @rcu: RCU structure for freeing the table * @future_tbl: Table under construction during rehashing + * @ntbl: Nested table used when out of memory. * @buckets: size * hash buckets */ struct bucket_table { unsigned int size; + unsigned int nest; unsigned int rehash; u32 hash_rnd; unsigned int locks_mask; @@ -81,7 +84,7 @@ struct bucket_table { struct bucket_table __rcu *future_tbl; - struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; + struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; /** @@ -374,6 +377,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void *arg); void rhashtable_destroy(struct rhashtable *ht); +struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash); +struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash); + #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) @@ -389,6 +398,27 @@ void rhashtable_destroy(struct rhashtable *ht); #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) +static inline struct rhash_head __rcu *const *rht_bucket( + const struct bucket_table *tbl, unsigned int hash) +{ + return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : + &tbl->buckets[hash]; +} + +static inline struct rhash_head __rcu **rht_bucket_var( + struct bucket_table *tbl, unsigned int hash) +{ + return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : + &tbl->buckets[hash]; +} + +static inline struct rhash_head __rcu **rht_bucket_insert( + struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) +{ + return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : + &tbl->buckets[hash]; +} + /** * rht_for_each_continue - continue iterating over hash chain * @pos: the &struct rhash_head to use as a loop cursor. @@ -408,7 +438,7 @@ void rhashtable_destroy(struct rhashtable *ht); * @hash: the hash value / bucket index */ #define rht_for_each(pos, tbl, hash) \ - rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash) + rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash) /** * rht_for_each_entry_continue - continue iterating over hash chain @@ -433,7 +463,7 @@ void rhashtable_destroy(struct rhashtable *ht); * @member: name of the &struct rhash_head within the hashable struct. */ #define rht_for_each_entry(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash], \ + rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash), \ tbl, hash, member) /** @@ -448,13 +478,13 @@ void rhashtable_destroy(struct rhashtable *ht); * This hash chain list-traversal primitive allows for the looped code to * remove the loop cursor from the list. */ -#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ - for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \ - next = !rht_is_a_nulls(pos) ? \ - rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ - (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ - pos = next, \ - next = !rht_is_a_nulls(pos) ? \ +#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ + for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \ + next = !rht_is_a_nulls(pos) ? \ + rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ + (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ + pos = next, \ + next = !rht_is_a_nulls(pos) ? \ rht_dereference_bucket(pos->next, tbl, hash) : NULL) /** @@ -485,7 +515,7 @@ void rhashtable_destroy(struct rhashtable *ht); * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_rcu(pos, tbl, hash) \ - rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash) + rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash) /** * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain @@ -518,8 +548,8 @@ void rhashtable_destroy(struct rhashtable *ht); * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ +#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ + rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \ tbl, hash, member) /** @@ -565,7 +595,7 @@ static inline struct rhash_head *__rhashtable_lookup( .ht = ht, .key = key, }; - const struct bucket_table *tbl; + struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; @@ -697,8 +727,12 @@ slow_path: } elasticity = ht->elasticity; - pprev = &tbl->buckets[hash]; - rht_for_each(head, tbl, hash) { + pprev = rht_bucket_insert(ht, tbl, hash); + data = ERR_PTR(-ENOMEM); + if (!pprev) + goto out; + + rht_for_each_continue(head, *pprev, tbl, hash) { struct rhlist_head *plist; struct rhlist_head *list; @@ -736,7 +770,7 @@ slow_path: if (unlikely(rht_grow_above_100(ht, tbl))) goto slow_path; - head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); + head = rht_dereference_bucket(*pprev, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (rhlist) { @@ -746,7 +780,7 @@ slow_path: RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(tbl->buckets[hash], obj); + rcu_assign_pointer(*pprev, obj); atomic_inc(&ht->nelems); if (rht_grow_above_75(ht, tbl)) @@ -955,8 +989,8 @@ static inline int __rhashtable_remove_fast_one( spin_lock_bh(lock); - pprev = &tbl->buckets[hash]; - rht_for_each(he, tbl, hash) { + pprev = rht_bucket_var(tbl, hash); + rht_for_each_continue(he, *pprev, tbl, hash) { struct rhlist_head *list; list = container_of(he, struct rhlist_head, rhead); @@ -1107,8 +1141,8 @@ static inline int __rhashtable_replace_fast( spin_lock_bh(lock); - pprev = &tbl->buckets[hash]; - rht_for_each(he, tbl, hash) { + pprev = rht_bucket_var(tbl, hash); + rht_for_each_continue(he, *pprev, tbl, hash) { if (he != obj_old) { pprev = &he->next; continue; -- cgit v1.2.3 From fb585b44383c4cff85f92e67377ee1c5f07d6dc1 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 10 Feb 2017 16:43:50 +0100 Subject: net: make net_device members garp_port and mrp_port conditional garp_port is only used in net/802/garp.c which is only compiled with CONFIG_GARP enabled. Same goes for mrp_port which is only used in net/802/mrp.c with CONFIG_MRP enabled. Only include the two members in struct net_device if their respective CONFIG_* is enabled. This saves a few bytes in struct net_device in case CONFIG_GARP or CONFIG_MRP are not enabled. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 54c82b5df0ba..98f65ed8f8b0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1865,8 +1865,12 @@ struct net_device { struct pcpu_vstats __percpu *vstats; }; +#if IS_ENABLED(CONFIG_GARP) struct garp_port __rcu *garp_port; +#endif +#if IS_ENABLED(CONFIG_MRP) struct mrp_port __rcu *mrp_port; +#endif struct device dev; const struct attribute_group *sysfs_groups[4]; -- cgit v1.2.3 From efff8e7879b8b7f8a077f495262f0bb9cfaa0b06 Mon Sep 17 00:00:00 2001 From: Uri Yanai Date: Tue, 7 Feb 2017 18:00:01 +0200 Subject: mmc: Adding AUTO_BKOPS_EN bit set for Auto BKOPS support Adding dedicated flag for AUTO_BKOPS in card->ext_csd structure. Read AUTO_BKOPS bit value from the device EXT_CSD and set to the card->ext_csd structure. In mmc_decode_ext_csd() add a print message in case the AUTO_BKOPS is enabled Signed-off-by: Uri Yanai Signed-off-by: Alex Lemberg Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 1 + include/linux/mmc/mmc.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 29d00c91c25c..77e61e0a216a 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -83,6 +83,7 @@ struct mmc_ext_csd { unsigned int hpi_cmd; /* cmd used as HPI */ bool bkops; /* background support bit */ bool man_bkops_en; /* manual bkops enable bit */ + bool auto_bkops_en; /* auto bkops enable bit */ unsigned int data_sector_size; /* 512 bytes or 4KB */ unsigned int data_tag_unit_size; /* DATA TAG UNIT size */ unsigned int boot_ro_lock; /* ro lock support */ diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 7406d9badda0..3ffc27aaeeaf 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -407,6 +407,7 @@ static inline bool mmc_op_multi(u32 opcode) * BKOPS modes */ #define EXT_CSD_MANUAL_BKOPS_MASK 0x01 +#define EXT_CSD_AUTO_BKOPS_MASK 0x02 /* * Command Queue -- cgit v1.2.3 From c43f1112c068f3b4b20a0a9d461c341d9caeb376 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 18 Jan 2017 14:10:33 +0200 Subject: IB/mlx5: Add additional checks before processing MADs Check the has_smi bit in vport context and class version of MADs before allowing MADs processing to take place. MAD_IFC SMI commands can be executed only if smi bit is set. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Maor Gottlieb Signed-off-by: Parvi Kaustubhi Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3a309f6a4a15..b8d69aeb1784 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -289,6 +289,7 @@ struct mlx5_port_caps { int gid_table_len; int pkey_table_len; u8 ext_port_cap; + bool has_smi; }; struct mlx5_cmd_mailbox { -- cgit v1.2.3 From 853fe1bf7554155376bb3b231112cdff9ff79177 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 13 Feb 2017 16:25:26 -0800 Subject: cdrom: Make device operations read-only Since function tables are a common target for attackers, it's best to keep them in read-only memory. As such, this makes the CDROM device ops tables const. This drops additionally n_minors, since it isn't used meaningfully, and sets the only user of cdrom_dummy_generic_packet explicitly so the variables can all be const. Inspired by similar changes in grsecurity/PaX. Signed-off-by: Kees Cook Acked-by: David S. Miller Signed-off-by: Jens Axboe --- include/linux/cdrom.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index 8609d577bb66..6e8f209a6dff 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -36,7 +36,7 @@ struct packet_command /* Uniform cdrom data structures for cdrom.c */ struct cdrom_device_info { - struct cdrom_device_ops *ops; /* link to device_ops */ + const struct cdrom_device_ops *ops; /* link to device_ops */ struct list_head list; /* linked list of all device_info */ struct gendisk *disk; /* matching block layer disk */ void *handle; /* driver-dependent data */ @@ -87,7 +87,6 @@ struct cdrom_device_ops { /* driver specifications */ const int capability; /* capability flags */ - int n_minors; /* number of active minor devices */ /* handle uniform packets for scsi type devices (scsi,atapi) */ int (*generic_packet) (struct cdrom_device_info *, struct packet_command *); @@ -123,6 +122,8 @@ extern int cdrom_mode_sense(struct cdrom_device_info *cdi, int page_code, int page_control); extern void init_cdrom_command(struct packet_command *cgc, void *buffer, int len, int type); +extern int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi, + struct packet_command *cgc); /* The SCSI spec says there could be 256 slots. */ #define CDROM_MAX_SLOTS 256 -- cgit v1.2.3 From 23a6964e3adb0796e1633562a574839b92360cb6 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Wed, 18 Jan 2017 15:25:10 +0200 Subject: IB/mlx5: Add port counter support for Receive WQs Counters weren't updated due to Receive WQs' traffic since the counter-id was not associated with the RQ. Added support for associating the q-counter-id with the Receive WQ. The attachment is done only when changing WQ's state from RESET to READY in modify-WQ command. FW support is required for the above, without this support Receive WQ counters will not count. Signed-off-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 37327f6ba9cb..2d197d8a7025 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4937,7 +4937,7 @@ struct mlx5_ifc_modify_rq_out_bits { enum { MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1, - MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID = 1ULL << 3, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID = 1ULL << 3, }; struct mlx5_ifc_modify_rq_in_bits { -- cgit v1.2.3 From 49780d42dfc9ec0f4090c32ca59688449da1a1cd Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 18 Jan 2017 16:58:10 +0200 Subject: IB/mlx5: Expose MR cache for mlx5_ib Allow other parts of mlx5_ib to use MR cache mechanism. * Add new functions mlx5_mr_cache_alloc and mlx5_mr_cache_free * Traditional MTT MKey buckets are limited by MAX_UMR_CACHE_ENTRY Additinal buckets may be added above. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/driver.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index b8d69aeb1784..2534b8a0fd7b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1052,7 +1052,8 @@ enum { }; enum { - MAX_MR_CACHE_ENTRIES = 21, + MAX_UMR_CACHE_ENTRY = 20, + MAX_MR_CACHE_ENTRIES }; enum { -- cgit v1.2.3 From 81713d3788d2e6bc005f15ee1c59d0eb06050a6b Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 18 Jan 2017 16:58:11 +0200 Subject: IB/mlx5: Add implicit MR support Add implicit MR, covering entire user address space. The MR is implemented as an indirect KSM MR consisting of 1GB direct MRs. Pages and direct MRs are added/removed to MR by ODP. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2534b8a0fd7b..886ff2b00500 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1053,6 +1053,8 @@ enum { enum { MAX_UMR_CACHE_ENTRY = 20, + MLX5_IMR_MTT_CACHE_ENTRY, + MLX5_IMR_KSM_CACHE_ENTRY, MAX_MR_CACHE_ENTRIES }; -- cgit v1.2.3 From 51c6ce2ae35980c755af33461c3138570ded615e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 11 Feb 2017 23:02:18 -0700 Subject: vmbus: callback is in softirq not workqueue The callback is done via tasklet not workqueue. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index e208e6437f5b..c9b6d533958f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -743,9 +742,7 @@ struct vmbus_channel { struct vmbus_close_msg close_msg; - /* Channel callback are invoked in this workqueue context */ - /* HANDLE dataWorkQueue; */ - + /* Channel callback's invoked in softirq context */ void (*onchannel_callback)(void *context); void *channel_callback_context; -- cgit v1.2.3 From 631e63a9f346cb657761ae22138f294718696501 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 11 Feb 2017 23:02:20 -0700 Subject: vmbus: change to per channel tasklet Make the event handling tasklet per channel rather than per-cpu. This allows for better fairness when getting lots of data on the same cpu. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index c9b6d533958f..69afc9337c0d 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -35,7 +35,7 @@ #include #include #include - +#include #define MAX_PAGE_BUFFER_COUNT 32 #define MAX_MULTIPAGE_BUFFER_COUNT 32 /* 128K */ @@ -743,6 +743,7 @@ struct vmbus_channel { struct vmbus_close_msg close_msg; /* Channel callback's invoked in softirq context */ + struct tasklet_struct callback_event; void (*onchannel_callback)(void *context); void *channel_callback_context; -- cgit v1.2.3 From b71e328297a3a578c482fb4814e737a0ec185839 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 11 Feb 2017 23:02:21 -0700 Subject: vmbus: add direct isr callback mode Change the simple boolean batched_reading into a tri-value. For future NAPI support in netvsc driver, the callback needs to occur directly in interrupt handler. Batched mode is also changed to disable host interrupts immediately in interrupt routine (to avoid unnecessary host signals), and the tasklet is rescheduled if more data is detected. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 69afc9337c0d..e5aac5c051f7 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -748,19 +748,21 @@ struct vmbus_channel { void *channel_callback_context; /* - * A channel can be marked for efficient (batched) - * reading: - * If batched_reading is set to "true", we read until the - * channel is empty and hold off interrupts from the host - * during the entire read process. - * If batched_reading is set to "false", the client is not - * going to perform batched reading. - * - * By default we will enable batched reading; specific - * drivers that don't want this behavior can turn it off. + * A channel can be marked for one of three modes of reading: + * BATCHED - callback called from taslket and should read + * channel until empty. Interrupts from the host + * are masked while read is in process (default). + * DIRECT - callback called from tasklet (softirq). + * ISR - callback called in interrupt context and must + * invoke its own deferred processing. + * Host interrupts are disabled and must be re-enabled + * when ring is empty. */ - - bool batched_reading; + enum hv_callback_mode { + HV_CALL_BATCHED, + HV_CALL_DIRECT, + HV_CALL_ISR + } callback_mode; bool is_dedicated_interrupt; struct hv_input_signal_event_buffer sig_buf; @@ -910,9 +912,10 @@ static inline void set_channel_affinity_state(struct vmbus_channel *c, c->affinity_policy = policy; } -static inline void set_channel_read_state(struct vmbus_channel *c, bool state) +static inline void set_channel_read_mode(struct vmbus_channel *c, + enum hv_callback_mode mode) { - c->batched_reading = state; + c->callback_mode = mode; } static inline void set_per_channel_state(struct vmbus_channel *c, void *s) -- cgit v1.2.3 From 5529eaf6e79a61e0ca7ade257f31d2ababc7f6c9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 11 Feb 2017 23:02:22 -0700 Subject: vmbus: remove conditional locking of vmbus_write All current usage of vmbus write uses the acquire_lock flag, therefore having it be optional is unnecessary. This also fixes a sparse warning since sparse doesn't like when a function has conditional locking. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index e5aac5c051f7..466374dbc98f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -845,16 +845,6 @@ struct vmbus_channel { * link up channels based on their CPU affinity. */ struct list_head percpu_list; - /* - * On the channel send side, many of the VMBUS - * device drivers explicity serialize access to the - * outgoing ring buffer. Give more control to the - * VMBUS device drivers in terms how to serialize - * accesss to the outgoing ring buffer. - * The default behavior will be to aquire the - * ring lock to preserve the current behavior. - */ - bool acquire_ring_lock; /* * For performance critical channels (storage, networking * etc,), Hyper-V has a mechanism to enhance the throughput @@ -895,11 +885,6 @@ struct vmbus_channel { }; -static inline void set_channel_lock_state(struct vmbus_channel *c, bool state) -{ - c->acquire_ring_lock = state; -} - static inline bool is_hvsock_channel(const struct vmbus_channel *c) { return !!(c->offermsg.offer.chn_flags & -- cgit v1.2.3 From 6e47dd3e2938f41d75045bbcb64aa9df3a463b2a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 11 Feb 2017 23:02:23 -0700 Subject: vmbus: expose hv_begin/end_read In order to implement NAPI in netvsc, the driver needs access to control host interrupt mask. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 466374dbc98f..08eb71a22c14 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1512,6 +1512,36 @@ init_cached_read_index(struct vmbus_channel *channel) rbi->cached_read_index = rbi->ring_buffer->read_index; } +/* + * Mask off host interrupt callback notifications + */ +static inline void hv_begin_read(struct hv_ring_buffer_info *rbi) +{ + rbi->ring_buffer->interrupt_mask = 1; + + /* make sure mask update is not reordered */ + virt_mb(); +} + +/* + * Re-enable host callback and return number of outstanding bytes + */ +static inline u32 hv_end_read(struct hv_ring_buffer_info *rbi) +{ + + rbi->ring_buffer->interrupt_mask = 0; + + /* make sure mask update is not reordered */ + virt_mb(); + + /* + * Now check to see if the ring buffer is still empty. + * If it is not, we raced and we need to process new + * incoming messages. + */ + return hv_get_bytes_to_read(rbi); +} + /* * An API to support in-place processing of incoming VMBUS packets. */ -- cgit v1.2.3 From e4165a0fad0963bf8b4a59f54d3360ccb6a6d1ea Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 11 Feb 2017 23:02:24 -0700 Subject: vmbus: constify parameters where possible Functions that just query state of ring buffer can have parameters marked const. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 08eb71a22c14..62bbf3c1aa4a 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -138,8 +138,8 @@ struct hv_ring_buffer_info { * for the specified ring buffer */ static inline void -hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi, - u32 *read, u32 *write) +hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi, + u32 *read, u32 *write) { u32 read_loc, write_loc, dsize; @@ -153,7 +153,7 @@ hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi, *read = dsize - *write; } -static inline u32 hv_get_bytes_to_read(struct hv_ring_buffer_info *rbi) +static inline u32 hv_get_bytes_to_read(const struct hv_ring_buffer_info *rbi) { u32 read_loc, write_loc, dsize, read; @@ -167,7 +167,7 @@ static inline u32 hv_get_bytes_to_read(struct hv_ring_buffer_info *rbi) return read; } -static inline u32 hv_get_bytes_to_write(struct hv_ring_buffer_info *rbi) +static inline u32 hv_get_bytes_to_write(const struct hv_ring_buffer_info *rbi) { u32 read_loc, write_loc, dsize, write; @@ -1448,9 +1448,9 @@ void vmbus_set_event(struct vmbus_channel *channel); /* Get the start of the ring buffer. */ static inline void * -hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info) +hv_get_ring_buffer(const struct hv_ring_buffer_info *ring_info) { - return (void *)ring_info->ring_buffer->buffer; + return ring_info->ring_buffer->buffer; } /* -- cgit v1.2.3 From e225c20eb0fd0b6657e640408f11ee392dc82b5b Mon Sep 17 00:00:00 2001 From: Scott Bauer Date: Tue, 14 Feb 2017 17:29:36 -0700 Subject: Move stack parameters for sed_ioctl to prevent oversized stack with CONFIG_KASAN When CONFIG_KASAN is enabled, compilation fails: block/sed-opal.c: In function 'sed_ioctl': block/sed-opal.c:2447:1: error: the frame size of 2256 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] Moved all the ioctl structures off the stack and dynamically allocate using _IOC_SIZE() Fixes: 455a7b238cd6 ("block: Add Sed-opal library") Reported-by: Arnd Bergmann Signed-off-by: Scott Bauer Signed-off-by: Jens Axboe --- include/linux/sed-opal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index af1a85eae193..205d520ea688 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -132,7 +132,7 @@ struct opal_dev { #ifdef CONFIG_BLK_SED_OPAL bool opal_unlock_from_suspend(struct opal_dev *dev); void init_opal_dev(struct opal_dev *opal_dev, sec_send_recv *send_recv); -int sed_ioctl(struct opal_dev *dev, unsigned int cmd, unsigned long ptr); +int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr); static inline bool is_sed_ioctl(unsigned int cmd) { @@ -160,7 +160,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) } static inline int sed_ioctl(struct opal_dev *dev, unsigned int cmd, - unsigned long ptr) + void __user *ioctl_ptr) { return 0; } -- cgit v1.2.3 From 5f114163f2f5eb2edbb49c4d3e0b405c7a8a7e2a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 15 Feb 2017 09:39:39 +0100 Subject: net: Add a skb_gro_flush_final helper. Add a skb_gro_flush_final helper to prepare for consuming skbs in call_gro_receive. We will extend this helper to not touch the skb if the skb is consumed by a gro callback with a followup patch. We need this to handle the upcomming IPsec ESP callbacks as they reinject the skb to the napi_gro_receive asynchronous. The handler is used in all gro_receive functions that can call the ESP gro handlers. Signed-off-by: Steffen Klassert --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 58afbd1cc659..f9da3acfee9a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2661,6 +2661,11 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, remcsum_unadjust((__sum16 *)ptr, grc->delta); } +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush) +{ + NAPI_GRO_CB(skb)->flush |= flush; +} + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, -- cgit v1.2.3 From 25393d3fc055b76587fcc91627aee8c345400c3a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 15 Feb 2017 09:39:44 +0100 Subject: net: Prepare gro for packet consuming gro callbacks The upcomming IPsec ESP gro callbacks will consume the skb, so prepare for that. Signed-off-by: Steffen Klassert --- include/linux/netdevice.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f9da3acfee9a..9e5d1cd9d975 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -352,6 +352,7 @@ enum gro_result { GRO_HELD, GRO_NORMAL, GRO_DROP, + GRO_CONSUMED, }; typedef enum gro_result gro_result_t; @@ -2661,10 +2662,18 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, remcsum_unadjust((__sum16 *)ptr, grc->delta); } +#ifdef CONFIG_XFRM_OFFLOAD +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush) +{ + if (PTR_ERR(pp) != -EINPROGRESS) + NAPI_GRO_CB(skb)->flush |= flush; +} +#else static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush) { NAPI_GRO_CB(skb)->flush |= flush; } +#endif static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, -- cgit v1.2.3 From 884f38607897cb4a963ea8a65296f0973a2828d0 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Wed, 15 Feb 2017 16:35:29 +0800 Subject: mmc: core: move some sdio IDs out of quirks file Consolidate all the sdio devices' IDs into sdio_ids. Signed-off-by: Shawn Lin Signed-off-by: Ulf Hansson --- include/linux/mmc/sdio_ids.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index d43ef96bf075..46794a7a531c 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -51,6 +51,7 @@ #define SDIO_DEVICE_ID_MARVELL_LIBERTAS 0x9103 #define SDIO_DEVICE_ID_MARVELL_8688WLAN 0x9104 #define SDIO_DEVICE_ID_MARVELL_8688BT 0x9105 +#define SDIO_DEVICE_ID_MARVELL_8797_F0 0x9128 #define SDIO_VENDOR_ID_SIANO 0x039a #define SDIO_DEVICE_ID_SIANO_NOVA_B0 0x0201 @@ -60,4 +61,10 @@ #define SDIO_DEVICE_ID_SIANO_NOVA_A0 0x1100 #define SDIO_DEVICE_ID_SIANO_STELLAR 0x5347 +#define SDIO_VENDOR_ID_TI 0x0097 +#define SDIO_DEVICE_ID_TI_WL1271 0x4076 + +#define SDIO_VENDOR_ID_STE 0x0020 +#define SDIO_DEVICE_ID_STE_CW1200 0x2280 + #endif /* LINUX_MMC_SDIO_IDS_H */ -- cgit v1.2.3 From 8a58a34ba479d42b3ef28f8ffd9e245a81a7786f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Feb 2017 16:41:15 +0100 Subject: timers: Make flags output in the timer_start tracepoint useful The timer flags in the timer_start trace event contain lots of useful information, but the meaning is not clear in the trace output. Making tools rely on the bit positions is bad as they might change over time. Decode the flags in the print out. Tools can retrieve the bits and their meaning from the trace format file. Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1702101639290.4036@nanos Requested-by: Arjan van de Ven Signed-off-by: Thomas Gleixner Signed-off-by: Steven Rostedt (VMware) --- include/linux/timer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 51d601f192d4..a17f915f9456 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -66,6 +66,8 @@ struct timer_list { #define TIMER_ARRAYSHIFT 22 #define TIMER_ARRAYMASK 0xFFC00000 +#define TIMER_TRACE_FLAGMASK (TIMER_MIGRATING | TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE) + #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ -- cgit v1.2.3 From 3821fd35b58dba449bd894014fbf4e1c43c9e951 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 3 Feb 2017 15:42:24 -0500 Subject: jump_label: Reduce the size of struct static_key The static_key->next field goes mostly unused. The field is used for associating module uses with a static key. Most uses of struct static_key define a static key in the core kernel and make use of it entirely within the core kernel, or define the static key in a module and make use of it only from within that module. In fact, of the ~3,000 static keys defined, I found only about 5 or so that did not fit this pattern. Thus, we can remove the static_key->next field entirely and overload the static_key->entries field. That is, when all the static_key uses are contained within the same module, static_key->entries continues to point to those uses. However, if the static_key uses are not contained within the module where the static_key is defined, then we allocate a struct static_key_mod, store a pointer to the uses within that struct static_key_mod, and have the static key point at the static_key_mod. This does incur some extra memory usage when a static_key is used in a module that does not define it, but since there are only a handful of such cases there is a net savings. In order to identify if the static_key->entries pointer contains a struct static_key_mod or a struct jump_entry pointer, bit 1 of static_key->entries is set to 1 if it points to a struct static_key_mod and is 0 if it points to a struct jump_entry. We were already using bit 0 in a similar way to store the initial value of the static_key. This does mean that allocations of struct static_key_mod and that the struct jump_entry tables need to be at least 4-byte aligned in memory. As far as I can tell all arches meet this criteria. For my .config, the patch increased the text by 778 bytes, but reduced the data + bss size by 14912, for a net savings of 14,134 bytes. text data bss dec hex filename 8092427 5016512 790528 13899467 d416cb vmlinux.pre 8093205 5001600 790528 13885333 d3df95 vmlinux.post Link: http://lkml.kernel.org/r/1486154544-4321-1-git-send-email-jbaron@akamai.com Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Joe Perches Signed-off-by: Jason Baron Signed-off-by: Steven Rostedt (VMware) --- include/linux/jump_label.h | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index a0547c571800..680c98b2f41c 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -89,11 +89,17 @@ extern bool static_key_initialized; struct static_key { atomic_t enabled; -/* Set lsb bit to 1 if branch is default true, 0 ot */ - struct jump_entry *entries; -#ifdef CONFIG_MODULES - struct static_key_mod *next; -#endif +/* + * bit 0 => 1 if key is initially true + * 0 if initially false + * bit 1 => 1 if points to struct static_key_mod + * 0 if points to struct jump_entry + */ + union { + unsigned long type; + struct jump_entry *entries; + struct static_key_mod *next; + }; }; #else @@ -118,9 +124,10 @@ struct module; #ifdef HAVE_JUMP_LABEL -#define JUMP_TYPE_FALSE 0UL -#define JUMP_TYPE_TRUE 1UL -#define JUMP_TYPE_MASK 1UL +#define JUMP_TYPE_FALSE 0UL +#define JUMP_TYPE_TRUE 1UL +#define JUMP_TYPE_LINKED 2UL +#define JUMP_TYPE_MASK 3UL static __always_inline bool static_key_false(struct static_key *key) { -- cgit v1.2.3 From b85ad494098bf881c3713218fbd74193e5d5c488 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 3 Feb 2017 12:39:03 -0600 Subject: of: introduce of_graph_get_remote_node The OF graph API leaves too much of the graph walking to clients when in many cases the driver doesn't care about accessing the port or endpoint nodes. The drivers typically just want the device connected via a particular graph connection. of_graph_get_remote_node provides this functionality. Signed-off-by: Rob Herring Acked-by: Philipp Zabel --- include/linux/of_graph.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index bb3a5a2cd570..abdb02eaef06 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -51,6 +51,8 @@ struct device_node *of_graph_get_endpoint_by_regs( struct device_node *of_graph_get_remote_port_parent( const struct device_node *node); struct device_node *of_graph_get_remote_port(const struct device_node *node); +struct device_node *of_graph_get_remote_node(const struct device_node *node, + u32 port, u32 endpoint); #else static inline int of_graph_parse_endpoint(const struct device_node *node, @@ -89,6 +91,12 @@ static inline struct device_node *of_graph_get_remote_port( { return NULL; } +static inline struct device_node *of_graph_get_remote_node( + const struct device_node *node, + u32 port, u32 endpoint) +{ + return NULL; +} #endif /* CONFIG_OF */ -- cgit v1.2.3 From c78c70fa30e23dc6cdb394f6c13880919499fba5 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 15 Feb 2017 10:24:10 +0200 Subject: qed: Add infrastructure for PTP support The patch adds the required qed interfaces for configuring/reading the PTP clock on the adapter. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 3613d63cd5d0..4cd1f0ccfa36 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -96,6 +96,7 @@ struct qed_update_vport_params { struct qed_start_vport_params { bool remove_inner_vlan; + bool handle_ptp_pkts; bool gro_enable; bool drop_ttl0; u8 vport_id; @@ -159,6 +160,15 @@ struct qed_eth_cb_ops { void (*force_mac) (void *dev, u8 *mac, bool forced); }; +#define QED_MAX_PHC_DRIFT_PPB 291666666 + +enum qed_ptp_filter_type { + QED_PTP_FILTER_L2, + QED_PTP_FILTER_IPV4, + QED_PTP_FILTER_IPV4_IPV6, + QED_PTP_FILTER_L2_IPV4_IPV6 +}; + #ifdef CONFIG_DCB /* Prototype declaration of qed_eth_dcbnl_ops should match with the declaration * of dcbnl_rtnl_ops structure. @@ -218,6 +228,17 @@ struct qed_eth_dcbnl_ops { }; #endif +struct qed_eth_ptp_ops { + int (*hwtstamp_tx_on)(struct qed_dev *); + int (*cfg_rx_filters)(struct qed_dev *, enum qed_ptp_filter_type); + int (*read_rx_ts)(struct qed_dev *, u64 *); + int (*read_tx_ts)(struct qed_dev *, u64 *); + int (*read_cc)(struct qed_dev *, u64 *); + int (*disable)(struct qed_dev *); + int (*adjfreq)(struct qed_dev *, s32); + int (*enable)(struct qed_dev *); +}; + struct qed_eth_ops { const struct qed_common_ops *common; #ifdef CONFIG_QED_SRIOV @@ -226,6 +247,7 @@ struct qed_eth_ops { #ifdef CONFIG_DCB const struct qed_eth_dcbnl_ops *dcb; #endif + const struct qed_eth_ptp_ops *ptp; int (*fill_dev_info)(struct qed_dev *cdev, struct qed_dev_eth_info *info); -- cgit v1.2.3 From c18a1e09008de7d8bd82b046d38a88f4285d53f9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 14 Feb 2017 23:28:59 +0800 Subject: block: introduce bio_clone_bioset_partial() md still need bio clone(not the fast version) for behind write, and it is more efficient to use bio_clone_bioset_partial(). The idea is simple and just copy the bvecs range specified from parameters. Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Ming Lei Signed-off-by: Shaohua Li --- include/linux/bio.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 7cf8a6c70a3f..8e521194f6fc 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -183,7 +183,7 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) -static inline unsigned bio_segments(struct bio *bio) +static inline unsigned __bio_segments(struct bio *bio, struct bvec_iter *bvec) { unsigned segs = 0; struct bio_vec bv; @@ -205,12 +205,17 @@ static inline unsigned bio_segments(struct bio *bio) break; } - bio_for_each_segment(bv, bio, iter) + __bio_for_each_segment(bv, bio, iter, *bvec) segs++; return segs; } +static inline unsigned bio_segments(struct bio *bio) +{ + return __bio_segments(bio, &bio->bi_iter); +} + /* * get a reference to a bio, so it won't disappear. the intended use is * something like: @@ -384,6 +389,8 @@ extern void bio_put(struct bio *); extern void __bio_clone_fast(struct bio *, struct bio *); extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); +extern struct bio *bio_clone_bioset_partial(struct bio *, gfp_t, + struct bio_set *, int, int); extern struct bio_set *fs_bio_set; -- cgit v1.2.3 From bf3f14d6342cfb37eab8f0cddd0e4d4063fd9fc9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 15 Feb 2017 22:29:51 -0500 Subject: rhashtable: Revert nested table changes. This reverts commits: 6a25478077d987edc5e2f880590a2bc5fcab4441 9dbbfb0ab6680c6a85609041011484e6658e7d3c 40137906c5f55c252194ef5834130383e639536f It's too risky to put in this late in the release cycle. We'll put these changes into the next merge window instead. Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 78 +++++++++++++--------------------------------- 1 file changed, 22 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f2e12a845910..5c132d3188be 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -61,7 +61,6 @@ struct rhlist_head { /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets - * @nest: Number of bits of first-level nested table. * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash * @locks_mask: Mask to apply before accessing locks[] @@ -69,12 +68,10 @@ struct rhlist_head { * @walkers: List of active walkers * @rcu: RCU structure for freeing the table * @future_tbl: Table under construction during rehashing - * @ntbl: Nested table used when out of memory. * @buckets: size * hash buckets */ struct bucket_table { unsigned int size; - unsigned int nest; unsigned int rehash; u32 hash_rnd; unsigned int locks_mask; @@ -84,7 +81,7 @@ struct bucket_table { struct bucket_table __rcu *future_tbl; - struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; + struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; /** @@ -377,12 +374,6 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void *arg); void rhashtable_destroy(struct rhashtable *ht); -struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash); -struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash); - #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) @@ -398,27 +389,6 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) -static inline struct rhash_head __rcu *const *rht_bucket( - const struct bucket_table *tbl, unsigned int hash) -{ - return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : - &tbl->buckets[hash]; -} - -static inline struct rhash_head __rcu **rht_bucket_var( - struct bucket_table *tbl, unsigned int hash) -{ - return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : - &tbl->buckets[hash]; -} - -static inline struct rhash_head __rcu **rht_bucket_insert( - struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) -{ - return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : - &tbl->buckets[hash]; -} - /** * rht_for_each_continue - continue iterating over hash chain * @pos: the &struct rhash_head to use as a loop cursor. @@ -438,7 +408,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * @hash: the hash value / bucket index */ #define rht_for_each(pos, tbl, hash) \ - rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash) + rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash) /** * rht_for_each_entry_continue - continue iterating over hash chain @@ -463,7 +433,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * @member: name of the &struct rhash_head within the hashable struct. */ #define rht_for_each_entry(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash), \ + rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash], \ tbl, hash, member) /** @@ -478,13 +448,13 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * This hash chain list-traversal primitive allows for the looped code to * remove the loop cursor from the list. */ -#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ - for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \ - next = !rht_is_a_nulls(pos) ? \ - rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ - (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ - pos = next, \ - next = !rht_is_a_nulls(pos) ? \ +#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ + for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \ + next = !rht_is_a_nulls(pos) ? \ + rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ + (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ + pos = next, \ + next = !rht_is_a_nulls(pos) ? \ rht_dereference_bucket(pos->next, tbl, hash) : NULL) /** @@ -515,7 +485,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_rcu(pos, tbl, hash) \ - rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash) + rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash) /** * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain @@ -548,8 +518,8 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \ +#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ + rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ tbl, hash, member) /** @@ -595,7 +565,7 @@ static inline struct rhash_head *__rhashtable_lookup( .ht = ht, .key = key, }; - struct bucket_table *tbl; + const struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; @@ -727,12 +697,8 @@ slow_path: } elasticity = ht->elasticity; - pprev = rht_bucket_insert(ht, tbl, hash); - data = ERR_PTR(-ENOMEM); - if (!pprev) - goto out; - - rht_for_each_continue(head, *pprev, tbl, hash) { + pprev = &tbl->buckets[hash]; + rht_for_each(head, tbl, hash) { struct rhlist_head *plist; struct rhlist_head *list; @@ -770,7 +736,7 @@ slow_path: if (unlikely(rht_grow_above_100(ht, tbl))) goto slow_path; - head = rht_dereference_bucket(*pprev, tbl, hash); + head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); if (rhlist) { @@ -780,7 +746,7 @@ slow_path: RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(*pprev, obj); + rcu_assign_pointer(tbl->buckets[hash], obj); atomic_inc(&ht->nelems); if (rht_grow_above_75(ht, tbl)) @@ -989,8 +955,8 @@ static inline int __rhashtable_remove_fast_one( spin_lock_bh(lock); - pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(he, *pprev, tbl, hash) { + pprev = &tbl->buckets[hash]; + rht_for_each(he, tbl, hash) { struct rhlist_head *list; list = container_of(he, struct rhlist_head, rhead); @@ -1141,8 +1107,8 @@ static inline int __rhashtable_replace_fast( spin_lock_bh(lock); - pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(he, *pprev, tbl, hash) { + pprev = &tbl->buckets[hash]; + rht_for_each(he, tbl, hash) { if (he != obj_old) { pprev = &he->next; continue; -- cgit v1.2.3 From b802c8410ca915e7772e738e68420115f1a3c811 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 15 Feb 2017 18:42:09 -0800 Subject: async_tx: deprecate broken support for channel switching Back in 2011, Russell pointed out that the "async_tx channel switch" capability was violating expectations of the dma mapping api [1]. At the time the existing uses were reviewed as still usable, but that longer term we needed a rework of the raid offload implementation. While some of the framework for a fixed implementation was introduced in 2012 [2], the wider rewrite never materialized. There continues to be interest in raid offload with new dma/raid engine drivers being submitted. Those drivers must not build on top of the broken channel switching capability. Prevent async_tx from using an offload engine if the channel switching capability is enabled. This still allows the engine to be used for other purposes, but the broken way async_tx uses these engines for raid will be disabled. For configurations where this causes a performance regression the only solution is to start the work of eliminating the async_tx api and moving channel management into the raid code directly where it can manage marshalling an operation stream between multiple dma channels. [1]: http://lists.infradead.org/pipermail/linux-arm-kernel/2011-January/036753.html [2]: https://lkml.org/lkml/2012/12/6/71 Cc: Anatolij Gustschin Cc: Anup Patel Cc: Rameshwar Prasad Sahu Cc: Saeed Bishara Cc: Thomas Petazzoni Reported-by: Russell King Signed-off-by: Dan Williams Signed-off-by: Vinod Koul --- include/linux/async_tx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 388574ea38ed..28e3cf1465ab 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -87,7 +87,7 @@ struct async_submit_ctl { void *scribble; }; -#ifdef CONFIG_DMA_ENGINE +#if defined(CONFIG_DMA_ENGINE) && !defined(CONFIG_ASYNC_TX_CHANNEL_SWITCH) #define async_tx_issue_pending_all dma_issue_pending_all /** -- cgit v1.2.3 From 2f44e29cef006a4b0a4ecf7d4c5aac7d0fbb505c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Feb 2017 22:53:12 +0100 Subject: genirq/msi: Add stubs for get_cached_msi_msg/pci_write_msi_msg A bug fix to the MSIx handling in vfio added references to functions that may not be defined if MSI is disabled in the kernel, resulting in this link error: drivers/built-in.o: In function `vfio_msi_set_vector_signal': :(.text+0x450808): undefined reference to `get_cached_msi_msg' :(.text+0x45080c): undefined reference to `write_msi_msg' As suggested by Alex Williamson, add stub implementations for get_cached_msi_msg() and pci_write_msi_msg(). In case this bugfix gets backported, please note that the #ifdef has changed over time, originally both functions were implemented in drivers/pci/msi.c and controlled by CONFIG_PCI_MSI, while nowadays get_cached_msi_msg() is part of the generic MSI support and can be used without PCI. Fixes: b8f02af096b1 ("vfio/pci: Restore MSIx message prior to enabling") Signed-off-by: Arnd Bergmann Cc: Marc Zyngier Cc: Alex Williamson Cc: Bjorn Helgaas Cc: Bart Van Assche Link: http://lkml.kernel.org/r/1413190208.4202.34.camel@ul30vt.home Link: http://lkml.kernel.org/r/20170214215343.3307861-1-arnd@arndb.de Signed-off-by: Thomas Gleixner --- include/linux/msi.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index 0db320b7bb15..a83b84ff70e5 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -17,7 +17,13 @@ struct msi_desc; struct pci_dev; struct platform_msi_priv_data; void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); +#ifdef CONFIG_GENERIC_MSI_IRQ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); +#else +static inline void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) +{ +} +#endif typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc, struct msi_msg *msg); @@ -116,11 +122,15 @@ struct msi_desc { struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc); void *msi_desc_to_pci_sysdata(struct msi_desc *desc); +void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg); #else /* CONFIG_PCI_MSI */ static inline void *msi_desc_to_pci_sysdata(struct msi_desc *desc) { return NULL; } +static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) +{ +} #endif /* CONFIG_PCI_MSI */ struct msi_desc *alloc_msi_entry(struct device *dev, int nvec, @@ -128,7 +138,6 @@ struct msi_desc *alloc_msi_entry(struct device *dev, int nvec, void free_msi_entry(struct msi_desc *entry); void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); -void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg); u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag); u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag); -- cgit v1.2.3 From bbd6411513aa8ef3ea02abab61318daf87c1af1e Mon Sep 17 00:00:00 2001 From: "Cao, Lei" Date: Fri, 3 Feb 2017 20:04:35 +0000 Subject: KVM: Support vCPU-based gfn->hva cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide versions of struct gfn_to_hva_cache functions that take vcpu as a parameter instead of struct kvm. The existing functions are not needed anymore, so delete them. This allows dirty pages to be logged in the vcpu dirty ring, instead of the global dirty ring, for ring-based dirty memory tracking. Signed-off-by: Lei Cao Message-Id: Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cda457bcedc1..2db458ee94b0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -641,18 +641,18 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); -int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len); +int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len); int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, int offset, int len); int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, unsigned long len); -int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len); -int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, int offset, unsigned long len); -int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - gpa_t gpa, unsigned long len); +int kvm_vcpu_write_guest_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len); +int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, + void *data, int offset, unsigned long len); +int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, + gpa_t gpa, unsigned long len); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); -- cgit v1.2.3 From 55f0cd3fb9c29c20fb94c47e28a9ec8cf704f8c2 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Thu, 16 Feb 2017 13:07:37 -0700 Subject: spi: spi-ep93xx: simplify GPIO chip selects This driver requires a GPIO line to be used for the chip select of each SPI device. Remove the ep93xx_spi_chip_ops definition from the platform data and use the spi core GPIO handling for the chip selects. Fix all the ep93xx platforms that use this driver and remove the old Documentation. Signed-off-by: H Hartley Sweeten Signed-off-by: Mark Brown --- include/linux/platform_data/spi-ep93xx.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/spi-ep93xx.h b/include/linux/platform_data/spi-ep93xx.h index 9bb63ac13f04..171a271c2cbd 100644 --- a/include/linux/platform_data/spi-ep93xx.h +++ b/include/linux/platform_data/spi-ep93xx.h @@ -5,25 +5,14 @@ struct spi_device; /** * struct ep93xx_spi_info - EP93xx specific SPI descriptor - * @num_chipselect: number of chip selects on this board, must be - * at least one + * @chipselect: array of gpio numbers to use as chip selects + * @num_chipselect: ARRAY_SIZE(chipselect) * @use_dma: use DMA for the transfers */ struct ep93xx_spi_info { + int *chipselect; int num_chipselect; bool use_dma; }; -/** - * struct ep93xx_spi_chip_ops - operation callbacks for SPI slave device - * @setup: setup the chip select mechanism - * @cleanup: cleanup the chip select mechanism - * @cs_control: control the device chip select - */ -struct ep93xx_spi_chip_ops { - int (*setup)(struct spi_device *spi); - void (*cleanup)(struct spi_device *spi); - void (*cs_control)(struct spi_device *spi, int value); -}; - #endif /* __ASM_MACH_EP93XX_SPI_H */ -- cgit v1.2.3 From bd7e5b0899a429445cc6e3037c13f8b5ae3be903 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 3 Feb 2017 21:18:52 -0800 Subject: KVM: x86: remove code for lazy FPU handling The FPU is always active now when running KVM. Reviewed-by: David Matlack Reviewed-by: Bandan Das Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2db458ee94b0..8d69d5150748 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -221,7 +221,6 @@ struct kvm_vcpu { struct mutex mutex; struct kvm_run *run; - int fpu_active; int guest_fpu_loaded, guest_xcr0_loaded; struct swait_queue_head wq; struct pid *pid; -- cgit v1.2.3 From da20420f83ea0fbcf3d03afda08d971ea1d8a356 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 11 Feb 2017 19:26:47 +0800 Subject: rhashtable: Add nested tables This patch adds code that handles GFP_ATOMIC kmalloc failure on insertion. As we cannot use vmalloc, we solve it by making our hash table nested. That is, we allocate single pages at each level and reach our desired table size by nesting them. When a nested table is created, only a single page is allocated at the top-level. Lower levels are allocated on demand during insertion. Therefore for each insertion to succeed, only two (non-consecutive) pages are needed. After a nested table is created, a rehash will be scheduled in order to switch to a vmalloced table as soon as possible. Also, the rehash code will never rehash into a nested table. If we detect a nested table during a rehash, the rehash will be aborted and a new rehash will be scheduled. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 78 +++++++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 5c132d3188be..f2e12a845910 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -61,6 +61,7 @@ struct rhlist_head { /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets + * @nest: Number of bits of first-level nested table. * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash * @locks_mask: Mask to apply before accessing locks[] @@ -68,10 +69,12 @@ struct rhlist_head { * @walkers: List of active walkers * @rcu: RCU structure for freeing the table * @future_tbl: Table under construction during rehashing + * @ntbl: Nested table used when out of memory. * @buckets: size * hash buckets */ struct bucket_table { unsigned int size; + unsigned int nest; unsigned int rehash; u32 hash_rnd; unsigned int locks_mask; @@ -81,7 +84,7 @@ struct bucket_table { struct bucket_table __rcu *future_tbl; - struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; + struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; /** @@ -374,6 +377,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void *arg); void rhashtable_destroy(struct rhashtable *ht); +struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash); +struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash); + #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) @@ -389,6 +398,27 @@ void rhashtable_destroy(struct rhashtable *ht); #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) +static inline struct rhash_head __rcu *const *rht_bucket( + const struct bucket_table *tbl, unsigned int hash) +{ + return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : + &tbl->buckets[hash]; +} + +static inline struct rhash_head __rcu **rht_bucket_var( + struct bucket_table *tbl, unsigned int hash) +{ + return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : + &tbl->buckets[hash]; +} + +static inline struct rhash_head __rcu **rht_bucket_insert( + struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) +{ + return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : + &tbl->buckets[hash]; +} + /** * rht_for_each_continue - continue iterating over hash chain * @pos: the &struct rhash_head to use as a loop cursor. @@ -408,7 +438,7 @@ void rhashtable_destroy(struct rhashtable *ht); * @hash: the hash value / bucket index */ #define rht_for_each(pos, tbl, hash) \ - rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash) + rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash) /** * rht_for_each_entry_continue - continue iterating over hash chain @@ -433,7 +463,7 @@ void rhashtable_destroy(struct rhashtable *ht); * @member: name of the &struct rhash_head within the hashable struct. */ #define rht_for_each_entry(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash], \ + rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash), \ tbl, hash, member) /** @@ -448,13 +478,13 @@ void rhashtable_destroy(struct rhashtable *ht); * This hash chain list-traversal primitive allows for the looped code to * remove the loop cursor from the list. */ -#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ - for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \ - next = !rht_is_a_nulls(pos) ? \ - rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ - (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ - pos = next, \ - next = !rht_is_a_nulls(pos) ? \ +#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ + for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \ + next = !rht_is_a_nulls(pos) ? \ + rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ + (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ + pos = next, \ + next = !rht_is_a_nulls(pos) ? \ rht_dereference_bucket(pos->next, tbl, hash) : NULL) /** @@ -485,7 +515,7 @@ void rhashtable_destroy(struct rhashtable *ht); * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_rcu(pos, tbl, hash) \ - rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash) + rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash) /** * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain @@ -518,8 +548,8 @@ void rhashtable_destroy(struct rhashtable *ht); * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ +#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ + rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \ tbl, hash, member) /** @@ -565,7 +595,7 @@ static inline struct rhash_head *__rhashtable_lookup( .ht = ht, .key = key, }; - const struct bucket_table *tbl; + struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; @@ -697,8 +727,12 @@ slow_path: } elasticity = ht->elasticity; - pprev = &tbl->buckets[hash]; - rht_for_each(head, tbl, hash) { + pprev = rht_bucket_insert(ht, tbl, hash); + data = ERR_PTR(-ENOMEM); + if (!pprev) + goto out; + + rht_for_each_continue(head, *pprev, tbl, hash) { struct rhlist_head *plist; struct rhlist_head *list; @@ -736,7 +770,7 @@ slow_path: if (unlikely(rht_grow_above_100(ht, tbl))) goto slow_path; - head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); + head = rht_dereference_bucket(*pprev, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (rhlist) { @@ -746,7 +780,7 @@ slow_path: RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(tbl->buckets[hash], obj); + rcu_assign_pointer(*pprev, obj); atomic_inc(&ht->nelems); if (rht_grow_above_75(ht, tbl)) @@ -955,8 +989,8 @@ static inline int __rhashtable_remove_fast_one( spin_lock_bh(lock); - pprev = &tbl->buckets[hash]; - rht_for_each(he, tbl, hash) { + pprev = rht_bucket_var(tbl, hash); + rht_for_each_continue(he, *pprev, tbl, hash) { struct rhlist_head *list; list = container_of(he, struct rhlist_head, rhead); @@ -1107,8 +1141,8 @@ static inline int __rhashtable_replace_fast( spin_lock_bh(lock); - pprev = &tbl->buckets[hash]; - rht_for_each(he, tbl, hash) { + pprev = rht_bucket_var(tbl, hash); + rht_for_each_continue(he, *pprev, tbl, hash) { if (he != obj_old) { pprev = &he->next; continue; -- cgit v1.2.3 From 9383191da4e40360a5d880fbe6bb03911c61621b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 16 Feb 2017 22:24:49 +0100 Subject: bpf: remove stubs for cBPF from arch code Remove the dummy bpf_jit_compile() stubs for eBPF JITs and make that a single __weak function in the core that can be overridden similarly to the eBPF one. Also remove stale pr_err() mentions of bpf_jit_compile. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index e4eb2546339a..c7a70e0cc3a0 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -607,6 +607,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); +void bpf_jit_compile(struct bpf_prog *prog); bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, @@ -625,7 +626,6 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, bpf_jit_fill_hole_t bpf_fill_ill_insns); void bpf_jit_binary_free(struct bpf_binary_header *hdr); -void bpf_jit_compile(struct bpf_prog *fp); void bpf_jit_free(struct bpf_prog *fp); struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp); @@ -669,10 +669,6 @@ static inline bool bpf_jit_blinding_enabled(void) return true; } #else -static inline void bpf_jit_compile(struct bpf_prog *fp) -{ -} - static inline void bpf_jit_free(struct bpf_prog *fp) { bpf_prog_unlock_free(fp); -- cgit v1.2.3 From 74451e66d516c55e309e8d89a4a1e7596e46aacd Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 16 Feb 2017 22:24:50 +0100 Subject: bpf: make jited programs visible in traces Long standing issue with JITed programs is that stack traces from function tracing check whether a given address is kernel code through {__,}kernel_text_address(), which checks for code in core kernel, modules and dynamically allocated ftrace trampolines. But what is still missing is BPF JITed programs (interpreted programs are not an issue as __bpf_prog_run() will be attributed to them), thus when a stack trace is triggered, the code walking the stack won't see any of the JITed ones. The same for address correlation done from user space via reading /proc/kallsyms. This is read by tools like perf, but the latter is also useful for permanent live tracing with eBPF itself in combination with stack maps when other eBPF types are part of the callchain. See offwaketime example on dumping stack from a map. This work tries to tackle that issue by making the addresses and symbols known to the kernel. The lookup from *kernel_text_address() is implemented through a latched RB tree that can be read under RCU in fast-path that is also shared for symbol/size/offset lookup for a specific given address in kallsyms. The slow-path iteration through all symbols in the seq file done via RCU list, which holds a tiny fraction of all exported ksyms, usually below 0.1 percent. Function symbols are exported as bpf_prog_, in order to aide debugging and attribution. This facility is currently enabled for root-only when bpf_jit_kallsyms is set to 1, and disabled if hardening is active in any mode. The rationale behind this is that still a lot of systems ship with world read permissions on kallsyms thus addresses should not get suddenly exposed for them. If that situation gets much better in future, we always have the option to change the default on this. Likewise, unprivileged programs are not allowed to add entries there either, but that is less of a concern as most such programs types relevant in this context are for root-only anyway. If enabled, call graphs and stack traces will then show a correct attribution; one example is illustrated below, where the trace is now visible in tooling such as perf script --kallsyms=/proc/kallsyms and friends. Before: 7fff8166889d bpf_clone_redirect+0x80007f0020ed (/lib/modules/4.9.0-rc8+/build/vmlinux) f5d80 __sendmsg_nocancel+0xffff006451f1a007 (/usr/lib64/libc-2.18.so) After: 7fff816688b7 bpf_clone_redirect+0x80007f002107 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fffa0575728 bpf_prog_33c45a467c9e061a+0x8000600020fb (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fffa07ef1fc cls_bpf_classify+0x8000600020dc (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff81678b68 tc_classify+0x80007f002078 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164d40b __netif_receive_skb_core+0x80007f0025fb (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164d718 __netif_receive_skb+0x80007f002018 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164e565 process_backlog+0x80007f002095 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164dc71 net_rx_action+0x80007f002231 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff81767461 __softirqentry_text_start+0x80007f0020d1 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff817658ac do_softirq_own_stack+0x80007f00201c (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff810a2c20 do_softirq+0x80007f002050 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff810a2cb5 __local_bh_enable_ip+0x80007f002085 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8168d452 ip_finish_output2+0x80007f002152 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8168ea3d ip_finish_output+0x80007f00217d (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8168f2af ip_output+0x80007f00203f (/lib/modules/4.9.0-rc8+/build/vmlinux) [...] 7fff81005854 do_syscall_64+0x80007f002054 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff817649eb return_from_SYSCALL_64+0x80007f002000 (/lib/modules/4.9.0-rc8+/build/vmlinux) f5d80 __sendmsg_nocancel+0xffff01c484812007 (/usr/lib64/libc-2.18.so) Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 ++ include/linux/filter.h | 112 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 115 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 57d60dc5b600..909fc033173a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -8,10 +8,12 @@ #define _LINUX_BPF_H 1 #include + #include #include #include #include +#include struct perf_event; struct bpf_map; @@ -177,6 +179,8 @@ struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; u32 max_ctx_offset; + struct latch_tree_node ksym_tnode; + struct list_head ksym_lnode; const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; diff --git a/include/linux/filter.h b/include/linux/filter.h index c7a70e0cc3a0..0c1cc9143cb2 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -54,6 +54,12 @@ struct bpf_prog_aux; #define BPF_REG_AX MAX_BPF_REG #define MAX_BPF_JIT_REG (MAX_BPF_REG + 1) +/* As per nm, we expose JITed images as text (code) section for + * kallsyms. That way, tools like perf can find it to match + * addresses. + */ +#define BPF_SYM_ELF_TYPE 't' + /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 @@ -555,6 +561,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { set_memory_rw((unsigned long)fp, fp->pages); } + +static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) +{ + set_memory_rw((unsigned long)hdr, hdr->pages); +} #else static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { @@ -563,8 +574,21 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp) static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { } + +static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) +{ +} #endif /* CONFIG_DEBUG_SET_MODULE_RONX */ +static inline struct bpf_binary_header * +bpf_jit_binary_hdr(const struct bpf_prog *fp) +{ + unsigned long real_start = (unsigned long)fp->bpf_func; + unsigned long addr = real_start & PAGE_MASK; + + return (void *)addr; +} + int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { @@ -617,6 +641,7 @@ void bpf_warn_invalid_xdp_action(u32 act); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; extern int bpf_jit_harden; +extern int bpf_jit_kallsyms; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); @@ -651,6 +676,11 @@ static inline bool bpf_jit_is_ebpf(void) # endif } +static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) +{ + return fp->jited && bpf_jit_is_ebpf(); +} + static inline bool bpf_jit_blinding_enabled(void) { /* These are the prerequisites, should someone ever have the @@ -668,11 +698,91 @@ static inline bool bpf_jit_blinding_enabled(void) return true; } -#else + +static inline bool bpf_jit_kallsyms_enabled(void) +{ + /* There are a couple of corner cases where kallsyms should + * not be enabled f.e. on hardening. + */ + if (bpf_jit_harden) + return false; + if (!bpf_jit_kallsyms) + return false; + if (bpf_jit_kallsyms == 1) + return true; + + return false; +} + +const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char *sym); +bool is_bpf_text_address(unsigned long addr); +int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, + char *sym); + +static inline const char * +bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym) +{ + const char *ret = __bpf_address_lookup(addr, size, off, sym); + + if (ret && modname) + *modname = NULL; + return ret; +} + +void bpf_prog_kallsyms_add(struct bpf_prog *fp); +void bpf_prog_kallsyms_del(struct bpf_prog *fp); + +#else /* CONFIG_BPF_JIT */ + +static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) +{ + return false; +} + static inline void bpf_jit_free(struct bpf_prog *fp) { bpf_prog_unlock_free(fp); } + +static inline bool bpf_jit_kallsyms_enabled(void) +{ + return false; +} + +static inline const char * +__bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char *sym) +{ + return NULL; +} + +static inline bool is_bpf_text_address(unsigned long addr) +{ + return false; +} + +static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *sym) +{ + return -ERANGE; +} + +static inline const char * +bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym) +{ + return NULL; +} + +static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp) +{ +} + +static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) +{ +} #endif /* CONFIG_BPF_JIT */ #define BPF_ANC BIT(15) -- cgit v1.2.3 From 4f1244c8298606b8fae64b4d78b820ae6b896e3c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Feb 2017 13:59:39 +0100 Subject: block/sed-opal: allocate struct opal_dev dynamically Insted of bloating the containing structure with it all the time this allocates struct opal_dev dynamically. Additionally this allows moving the definition of struct opal_dev into sed-opal.c. For this a new private data field is added to it that is passed to the send/receive callback. After that a lot of internals can be made private as well. Signed-off-by: Christoph Hellwig Tested-by: Scott Bauer Reviewed-by: Scott Bauer Signed-off-by: Jens Axboe --- include/linux/sed-opal.h | 116 ++--------------------------------------------- 1 file changed, 4 insertions(+), 112 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index 205d520ea688..deee23d012e7 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -21,117 +21,14 @@ #include #include -/* - * These constant values come from: - * SPC-4 section - * 6.30 SECURITY PROTOCOL IN command / table 265. - */ -enum { - TCG_SECP_00 = 0, - TCG_SECP_01, -}; struct opal_dev; -#define IO_BUFFER_LENGTH 2048 -#define MAX_TOKS 64 - -typedef int (*opal_step)(struct opal_dev *dev); -typedef int (sec_send_recv)(struct opal_dev *ctx, u16 spsp, u8 secp, - void *buffer, size_t len, bool send); - - -enum opal_atom_width { - OPAL_WIDTH_TINY, - OPAL_WIDTH_SHORT, - OPAL_WIDTH_MEDIUM, - OPAL_WIDTH_LONG, - OPAL_WIDTH_TOKEN -}; - -/* - * Token defs derived from: - * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 - * 3.2.2 Data Stream Encoding - */ -enum opal_response_token { - OPAL_DTA_TOKENID_BYTESTRING = 0xe0, - OPAL_DTA_TOKENID_SINT = 0xe1, - OPAL_DTA_TOKENID_UINT = 0xe2, - OPAL_DTA_TOKENID_TOKEN = 0xe3, /* actual token is returned */ - OPAL_DTA_TOKENID_INVALID = 0X0 -}; - -/* - * On the parsed response, we don't store again the toks that are already - * stored in the response buffer. Instead, for each token, we just store a - * pointer to the position in the buffer where the token starts, and the size - * of the token in bytes. - */ -struct opal_resp_tok { - const u8 *pos; - size_t len; - enum opal_response_token type; - enum opal_atom_width width; - union { - u64 u; - s64 s; - } stored; -}; - -/* - * From the response header it's not possible to know how many tokens there are - * on the payload. So we hardcode that the maximum will be MAX_TOKS, and later - * if we start dealing with messages that have more than that, we can increase - * this number. This is done to avoid having to make two passes through the - * response, the first one counting how many tokens we have and the second one - * actually storing the positions. - */ -struct parsed_resp { - int num; - struct opal_resp_tok toks[MAX_TOKS]; -}; - -/** - * struct opal_dev - The structure representing a OPAL enabled SED. - * @supported: Whether or not OPAL is supported on this controller. - * @send_recv: The combined sec_send/sec_recv function pointer. - * @opal_step: A series of opal methods that are necessary to complete a command. - * @func_data: An array of parameters for the opal methods above. - * @state: Describes the current opal_step we're working on. - * @dev_lock: Locks the entire opal_dev structure. - * @parsed: Parsed response from controller. - * @prev_data: Data returned from a method to the controller. - * @unlk_lst: A list of Locking ranges to unlock on this device during a resume. - */ -struct opal_dev { - bool initialized; - bool supported; - sec_send_recv *send_recv; - - const opal_step *funcs; - void **func_data; - int state; - struct mutex dev_lock; - u16 comid; - u32 hsn; - u32 tsn; - u64 align; - u64 lowest_lba; - - size_t pos; - u8 cmd[IO_BUFFER_LENGTH]; - u8 resp[IO_BUFFER_LENGTH]; - - struct parsed_resp parsed; - size_t prev_d_len; - void *prev_data; - - struct list_head unlk_lst; -}; +typedef int (sec_send_recv)(void *data, u16 spsp, u8 secp, void *buffer, + size_t len, bool send); #ifdef CONFIG_BLK_SED_OPAL bool opal_unlock_from_suspend(struct opal_dev *dev); -void init_opal_dev(struct opal_dev *opal_dev, sec_send_recv *send_recv); +struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv); int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr); static inline bool is_sed_ioctl(unsigned int cmd) @@ -168,11 +65,6 @@ static inline bool opal_unlock_from_suspend(struct opal_dev *dev) { return false; } -static inline void init_opal_dev(struct opal_dev *opal_dev, - sec_send_recv *send_recv) -{ - opal_dev->supported = false; - opal_dev->initialized = true; -} +#define init_opal_dev(data, send_recv) NULL #endif /* CONFIG_BLK_SED_OPAL */ #endif /* LINUX_OPAL_H */ -- cgit v1.2.3 From 8a9ae523282f324989850fcf41312b42a2fb9296 Mon Sep 17 00:00:00 2001 From: Scott Bauer Date: Fri, 17 Feb 2017 13:59:40 +0100 Subject: nvme: Check for Security send/recv support before issuing commands. We need to verify that the controller supports the security commands before actually trying to issue them. Signed-off-by: Scott Bauer [hch: moved the check so that we don't call into the OPAL code if not supported] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 3d1c6f1b15c9..00eac863a9c7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -244,6 +244,7 @@ enum { NVME_CTRL_ONCS_DSM = 1 << 2, NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, NVME_CTRL_VWC_PRESENT = 1 << 0, + NVME_CTRL_OACS_SEC_SUPP = 1 << 0, }; struct nvme_lbaf { -- cgit v1.2.3 From 1e128c81290a419ab9ec8b09fe989f1c6c15a0f4 Mon Sep 17 00:00:00 2001 From: Arun Easi Date: Wed, 15 Feb 2017 06:28:22 -0800 Subject: qed: Add support for hardware offloaded FCoE. This adds the backbone required for the various HW initalizations which are necessary for the FCoE driver (qedf) for QLogic FastLinQ 4xxxx line of adapters - FW notification, resource initializations, etc. Signed-off-by: Arun Easi Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 10 +- include/linux/qed/fcoe_common.h | 715 ++++++++++++++++++++++++++++++++++++++++ include/linux/qed/qed_fcoe_if.h | 145 ++++++++ include/linux/qed/qed_if.h | 41 ++- 4 files changed, 908 insertions(+), 3 deletions(-) create mode 100644 include/linux/qed/fcoe_common.h create mode 100644 include/linux/qed/qed_fcoe_if.h (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index c33080baf38c..52966b9bfde3 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -62,6 +62,7 @@ #define COMMON_QUEUE_ENTRY_MAX_BYTE_SIZE 64 #define ISCSI_CDU_TASK_SEG_TYPE 0 +#define FCOE_CDU_TASK_SEG_TYPE 0 #define RDMA_CDU_TASK_SEG_TYPE 1 #define FW_ASSERT_GENERAL_ATTN_IDX 32 @@ -205,6 +206,9 @@ #define DQ_XCM_ETH_TX_BD_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD3 #define DQ_XCM_ETH_TX_BD_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 #define DQ_XCM_ETH_GO_TO_BD_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD5 +#define DQ_XCM_FCOE_SQ_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD3 +#define DQ_XCM_FCOE_SQ_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 +#define DQ_XCM_FCOE_X_FERQ_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD5 #define DQ_XCM_ISCSI_SQ_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD3 #define DQ_XCM_ISCSI_SQ_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 #define DQ_XCM_ISCSI_MORE_TO_SEND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG3 @@ -261,6 +265,7 @@ #define DQ_XCM_ETH_TERMINATE_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF19) #define DQ_XCM_ETH_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) #define DQ_XCM_ETH_TPH_EN_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF23) +#define DQ_XCM_FCOE_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) #define DQ_XCM_ISCSI_DQ_FLUSH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF19) #define DQ_XCM_ISCSI_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) #define DQ_XCM_ISCSI_PROC_ONLY_CLEANUP_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF23) @@ -291,6 +296,9 @@ #define DQ_TCM_AGG_FLG_SHIFT_CF6 6 #define DQ_TCM_AGG_FLG_SHIFT_CF7 7 /* TCM agg counter flag selection (FW) */ +#define DQ_TCM_FCOE_FLUSH_Q0_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF1) +#define DQ_TCM_FCOE_DUMMY_TIMER_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF2) +#define DQ_TCM_FCOE_TIMER_STOP_ALL_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF3) #define DQ_TCM_ISCSI_FLUSH_Q0_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF1) #define DQ_TCM_ISCSI_TIMER_STOP_ALL_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF3) @@ -728,7 +736,7 @@ enum mf_mode { /* Per-protocol connection types */ enum protocol_type { PROTOCOLID_ISCSI, - PROTOCOLID_RESERVED2, + PROTOCOLID_FCOE, PROTOCOLID_ROCE, PROTOCOLID_CORE, PROTOCOLID_ETH, diff --git a/include/linux/qed/fcoe_common.h b/include/linux/qed/fcoe_common.h new file mode 100644 index 000000000000..2e417a45c5f7 --- /dev/null +++ b/include/linux/qed/fcoe_common.h @@ -0,0 +1,715 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef __FCOE_COMMON__ +#define __FCOE_COMMON__ +/*********************/ +/* FCOE FW CONSTANTS */ +/*********************/ + +#define FC_ABTS_REPLY_MAX_PAYLOAD_LEN 12 +#define FCOE_MAX_SIZE_FCP_DATA_SUPER (8600) + +struct fcoe_abts_pkt { + __le32 abts_rsp_fc_payload_lo; + __le16 abts_rsp_rx_id; + u8 abts_rsp_rctl; + u8 reserved2; +}; + +/* FCoE additional WQE (Sq/XferQ) information */ +union fcoe_additional_info_union { + __le32 previous_tid; + __le32 parent_tid; + __le32 burst_length; + __le32 seq_rec_updated_offset; +}; + +struct fcoe_exp_ro { + __le32 data_offset; + __le32 reserved; +}; + +union fcoe_cleanup_addr_exp_ro_union { + struct regpair abts_rsp_fc_payload_hi; + struct fcoe_exp_ro exp_ro; +}; + +/* FCoE Ramrod Command IDs */ +enum fcoe_completion_status { + FCOE_COMPLETION_STATUS_SUCCESS, + FCOE_COMPLETION_STATUS_FCOE_VER_ERR, + FCOE_COMPLETION_STATUS_SRC_MAC_ADD_ARR_ERR, + MAX_FCOE_COMPLETION_STATUS +}; + +struct fc_addr_nw { + u8 addr_lo; + u8 addr_mid; + u8 addr_hi; +}; + +/* FCoE connection offload */ +struct fcoe_conn_offload_ramrod_data { + struct regpair sq_pbl_addr; + struct regpair sq_curr_page_addr; + struct regpair sq_next_page_addr; + struct regpair xferq_pbl_addr; + struct regpair xferq_curr_page_addr; + struct regpair xferq_next_page_addr; + struct regpair respq_pbl_addr; + struct regpair respq_curr_page_addr; + struct regpair respq_next_page_addr; + __le16 dst_mac_addr_lo; + __le16 dst_mac_addr_mid; + __le16 dst_mac_addr_hi; + __le16 src_mac_addr_lo; + __le16 src_mac_addr_mid; + __le16 src_mac_addr_hi; + __le16 tx_max_fc_pay_len; + __le16 e_d_tov_timer_val; + __le16 rx_max_fc_pay_len; + __le16 vlan_tag; +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_VLAN_ID_MASK 0xFFF +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_VLAN_ID_SHIFT 0 +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_CFI_MASK 0x1 +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_CFI_SHIFT 12 +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_PRIORITY_MASK 0x7 +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_PRIORITY_SHIFT 13 + __le16 physical_q0; + __le16 rec_rr_tov_timer_val; + struct fc_addr_nw s_id; + u8 max_conc_seqs_c3; + struct fc_addr_nw d_id; + u8 flags; +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_CONT_INCR_SEQ_CNT_MASK 0x1 +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_CONT_INCR_SEQ_CNT_SHIFT 0 +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_CONF_REQ_MASK 0x1 +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_CONF_REQ_SHIFT 1 +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_REC_VALID_MASK 0x1 +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_REC_VALID_SHIFT 2 +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_VLAN_FLAG_MASK 0x1 +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_B_VLAN_FLAG_SHIFT 3 +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_MODE_MASK 0x3 +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_MODE_SHIFT 4 +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_RESERVED0_MASK 0x3 +#define FCOE_CONN_OFFLOAD_RAMROD_DATA_RESERVED0_SHIFT 6 + __le16 conn_id; + u8 def_q_idx; + u8 reserved[5]; +}; + +/* FCoE terminate connection request */ +struct fcoe_conn_terminate_ramrod_data { + struct regpair terminate_params_addr; +}; + +struct fcoe_fast_sgl_ctx { + struct regpair sgl_start_addr; + __le32 sgl_byte_offset; + __le16 task_reuse_cnt; + __le16 init_offset_in_first_sge; +}; + +struct fcoe_slow_sgl_ctx { + struct regpair base_sgl_addr; + __le16 curr_sge_off; + __le16 remainder_num_sges; + __le16 curr_sgl_index; + __le16 reserved; +}; + +struct fcoe_sge { + struct regpair sge_addr; + __le16 size; + __le16 reserved0; + u8 reserved1[3]; + u8 is_valid_sge; +}; + +union fcoe_data_desc_ctx { + struct fcoe_fast_sgl_ctx fast; + struct fcoe_slow_sgl_ctx slow; + struct fcoe_sge single_sge; +}; + +union fcoe_dix_desc_ctx { + struct fcoe_slow_sgl_ctx dix_sgl; + struct fcoe_sge cached_dix_sge; +}; + +struct fcoe_fcp_cmd_payload { + __le32 opaque[8]; +}; + +struct fcoe_fcp_rsp_payload { + __le32 opaque[6]; +}; + +struct fcoe_fcp_xfer_payload { + __le32 opaque[3]; +}; + +/* FCoE firmware function init */ +struct fcoe_init_func_ramrod_data { + struct scsi_init_func_params func_params; + struct scsi_init_func_queues q_params; + __le16 mtu; + __le16 sq_num_pages_in_pbl; + __le32 reserved; +}; + +/* FCoE: Mode of the connection: Target or Initiator or both */ +enum fcoe_mode_type { + FCOE_INITIATOR_MODE = 0x0, + FCOE_TARGET_MODE = 0x1, + FCOE_BOTH_OR_NOT_CHOSEN = 0x3, + MAX_FCOE_MODE_TYPE +}; + +struct fcoe_mstorm_fcoe_task_st_ctx_fp { + __le16 flags; +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_RSRV0_MASK 0x7FFF +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_RSRV0_SHIFT 0 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_MP_INCLUDE_FC_HEADER_MASK 0x1 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_MP_INCLUDE_FC_HEADER_SHIFT 15 + __le16 difDataResidue; + __le16 parent_id; + __le16 single_sge_saved_offset; + __le32 data_2_trns_rem; + __le32 offset_in_io; + union fcoe_dix_desc_ctx dix_desc; + union fcoe_data_desc_ctx data_desc; +}; + +struct fcoe_mstorm_fcoe_task_st_ctx_non_fp { + __le16 flags; +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HOST_INTERFACE_MASK 0x3 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HOST_INTERFACE_SHIFT 0 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_TO_PEER_MASK 0x1 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_TO_PEER_SHIFT 2 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_APP_TAG_MASK 0x1 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_APP_TAG_SHIFT 3 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_INTERVAL_SIZE_LOG_MASK 0xF +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_INTERVAL_SIZE_LOG_SHIFT 4 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_BLOCK_SIZE_MASK 0x3 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_BLOCK_SIZE_SHIFT 8 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RESERVED_MASK 0x1 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RESERVED_SHIFT 10 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HAS_FIRST_PACKET_ARRIVED_MASK 0x1 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HAS_FIRST_PACKET_ARRIVED_SHIFT 11 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_REF_TAG_MASK 0x1 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_REF_TAG_SHIFT 12 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_CACHED_SGE_FLG_MASK 0x1 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_CACHED_SGE_FLG_SHIFT 13 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_OFFSET_IN_IO_VALID_MASK 0x1 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_OFFSET_IN_IO_VALID_SHIFT 14 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_SUPPORTED_MASK 0x1 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_SUPPORTED_SHIFT 15 + u8 tx_rx_sgl_mode; +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_TX_SGL_MODE_MASK 0x7 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_TX_SGL_MODE_SHIFT 0 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE_MASK 0x7 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE_SHIFT 3 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RSRV1_MASK 0x3 +#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RSRV1_SHIFT 6 + u8 rsrv2; + __le32 num_prm_zero_read; + struct regpair rsp_buf_addr; +}; + +struct fcoe_rx_stat { + struct regpair fcoe_rx_byte_cnt; + struct regpair fcoe_rx_data_pkt_cnt; + struct regpair fcoe_rx_xfer_pkt_cnt; + struct regpair fcoe_rx_other_pkt_cnt; + __le32 fcoe_silent_drop_pkt_cmdq_full_cnt; + __le32 fcoe_silent_drop_pkt_rq_full_cnt; + __le32 fcoe_silent_drop_pkt_crc_error_cnt; + __le32 fcoe_silent_drop_pkt_task_invalid_cnt; + __le32 fcoe_silent_drop_total_pkt_cnt; + __le32 rsrv; +}; + +enum fcoe_sgl_mode { + FCOE_SLOW_SGL, + FCOE_SINGLE_FAST_SGE, + FCOE_2_FAST_SGE, + FCOE_3_FAST_SGE, + FCOE_4_FAST_SGE, + FCOE_MUL_FAST_SGES, + MAX_FCOE_SGL_MODE +}; + +struct fcoe_stat_ramrod_data { + struct regpair stat_params_addr; +}; + +struct protection_info_ctx { + __le16 flags; +#define PROTECTION_INFO_CTX_HOST_INTERFACE_MASK 0x3 +#define PROTECTION_INFO_CTX_HOST_INTERFACE_SHIFT 0 +#define PROTECTION_INFO_CTX_DIF_TO_PEER_MASK 0x1 +#define PROTECTION_INFO_CTX_DIF_TO_PEER_SHIFT 2 +#define PROTECTION_INFO_CTX_VALIDATE_DIX_APP_TAG_MASK 0x1 +#define PROTECTION_INFO_CTX_VALIDATE_DIX_APP_TAG_SHIFT 3 +#define PROTECTION_INFO_CTX_INTERVAL_SIZE_LOG_MASK 0xF +#define PROTECTION_INFO_CTX_INTERVAL_SIZE_LOG_SHIFT 4 +#define PROTECTION_INFO_CTX_VALIDATE_DIX_REF_TAG_MASK 0x1 +#define PROTECTION_INFO_CTX_VALIDATE_DIX_REF_TAG_SHIFT 8 +#define PROTECTION_INFO_CTX_RESERVED0_MASK 0x7F +#define PROTECTION_INFO_CTX_RESERVED0_SHIFT 9 + u8 dix_block_size; + u8 dst_size; +}; + +union protection_info_union_ctx { + struct protection_info_ctx info; + __le32 value; +}; + +struct fcp_rsp_payload_padded { + struct fcoe_fcp_rsp_payload rsp_payload; + __le32 reserved[2]; +}; + +struct fcp_xfer_payload_padded { + struct fcoe_fcp_xfer_payload xfer_payload; + __le32 reserved[5]; +}; + +struct fcoe_tx_data_params { + __le32 data_offset; + __le32 offset_in_io; + u8 flags; +#define FCOE_TX_DATA_PARAMS_OFFSET_IN_IO_VALID_MASK 0x1 +#define FCOE_TX_DATA_PARAMS_OFFSET_IN_IO_VALID_SHIFT 0 +#define FCOE_TX_DATA_PARAMS_DROP_DATA_MASK 0x1 +#define FCOE_TX_DATA_PARAMS_DROP_DATA_SHIFT 1 +#define FCOE_TX_DATA_PARAMS_AFTER_SEQ_REC_MASK 0x1 +#define FCOE_TX_DATA_PARAMS_AFTER_SEQ_REC_SHIFT 2 +#define FCOE_TX_DATA_PARAMS_RESERVED0_MASK 0x1F +#define FCOE_TX_DATA_PARAMS_RESERVED0_SHIFT 3 + u8 dif_residual; + __le16 seq_cnt; + __le16 single_sge_saved_offset; + __le16 next_dif_offset; + __le16 seq_id; + __le16 reserved3; +}; + +struct fcoe_tx_mid_path_params { + __le32 parameter; + u8 r_ctl; + u8 type; + u8 cs_ctl; + u8 df_ctl; + __le16 rx_id; + __le16 ox_id; +}; + +struct fcoe_tx_params { + struct fcoe_tx_data_params data; + struct fcoe_tx_mid_path_params mid_path; +}; + +union fcoe_tx_info_union_ctx { + struct fcoe_fcp_cmd_payload fcp_cmd_payload; + struct fcp_rsp_payload_padded fcp_rsp_payload; + struct fcp_xfer_payload_padded fcp_xfer_payload; + struct fcoe_tx_params tx_params; +}; + +struct ystorm_fcoe_task_st_ctx { + u8 task_type; + u8 sgl_mode; +#define YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_MASK 0x7 +#define YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_SHIFT 0 +#define YSTORM_FCOE_TASK_ST_CTX_RSRV_MASK 0x1F +#define YSTORM_FCOE_TASK_ST_CTX_RSRV_SHIFT 3 + u8 cached_dix_sge; + u8 expect_first_xfer; + __le32 num_pbf_zero_write; + union protection_info_union_ctx protection_info_union; + __le32 data_2_trns_rem; + union fcoe_tx_info_union_ctx tx_info_union; + union fcoe_dix_desc_ctx dix_desc; + union fcoe_data_desc_ctx data_desc; + __le16 ox_id; + __le16 rx_id; + __le32 task_rety_identifier; + __le32 reserved1[2]; +}; + +struct ystorm_fcoe_task_ag_ctx { + u8 byte0; + u8 byte1; + __le16 word0; + u8 flags0; +#define YSTORM_FCOE_TASK_AG_CTX_NIBBLE0_MASK 0xF +#define YSTORM_FCOE_TASK_AG_CTX_NIBBLE0_SHIFT 0 +#define YSTORM_FCOE_TASK_AG_CTX_BIT0_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_BIT0_SHIFT 4 +#define YSTORM_FCOE_TASK_AG_CTX_BIT1_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT 5 +#define YSTORM_FCOE_TASK_AG_CTX_BIT2_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_BIT2_SHIFT 6 +#define YSTORM_FCOE_TASK_AG_CTX_BIT3_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_BIT3_SHIFT 7 + u8 flags1; +#define YSTORM_FCOE_TASK_AG_CTX_CF0_MASK 0x3 +#define YSTORM_FCOE_TASK_AG_CTX_CF0_SHIFT 0 +#define YSTORM_FCOE_TASK_AG_CTX_CF1_MASK 0x3 +#define YSTORM_FCOE_TASK_AG_CTX_CF1_SHIFT 2 +#define YSTORM_FCOE_TASK_AG_CTX_CF2SPECIAL_MASK 0x3 +#define YSTORM_FCOE_TASK_AG_CTX_CF2SPECIAL_SHIFT 4 +#define YSTORM_FCOE_TASK_AG_CTX_CF0EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_CF0EN_SHIFT 6 +#define YSTORM_FCOE_TASK_AG_CTX_CF1EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT 7 + u8 flags2; +#define YSTORM_FCOE_TASK_AG_CTX_BIT4_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_BIT4_SHIFT 0 +#define YSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT 1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT 2 +#define YSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT 3 +#define YSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT 4 +#define YSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT 5 +#define YSTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT 6 +#define YSTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK 0x1 +#define YSTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT 7 + u8 byte2; + __le32 reg0; + u8 byte3; + u8 byte4; + __le16 rx_id; + __le16 word2; + __le16 word3; + __le16 word4; + __le16 word5; + __le32 reg1; + __le32 reg2; +}; + +struct tstorm_fcoe_task_ag_ctx { + u8 reserved; + u8 byte1; + __le16 icid; + u8 flags0; +#define TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF +#define TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0 +#define TSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4 +#define TSTORM_FCOE_TASK_AG_CTX_BIT1_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT 5 +#define TSTORM_FCOE_TASK_AG_CTX_WAIT_ABTS_RSP_F_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_WAIT_ABTS_RSP_F_SHIFT 6 +#define TSTORM_FCOE_TASK_AG_CTX_VALID_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_VALID_SHIFT 7 + u8 flags1; +#define TSTORM_FCOE_TASK_AG_CTX_FALSE_RR_TOV_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_FALSE_RR_TOV_SHIFT 0 +#define TSTORM_FCOE_TASK_AG_CTX_BIT5_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_BIT5_SHIFT 1 +#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_SHIFT 2 +#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_SHIFT 4 +#define TSTORM_FCOE_TASK_AG_CTX_CF2_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_CF2_SHIFT 6 + u8 flags2; +#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_SHIFT 0 +#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_SHIFT 2 +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_SHIFT 4 +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_SHIFT 6 + u8 flags3; +#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_MASK 0x3 +#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_SHIFT 0 +#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_EN_SHIFT 2 +#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_EN_SHIFT 3 +#define TSTORM_FCOE_TASK_AG_CTX_CF2EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT 4 +#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_EN_SHIFT 5 +#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_SHIFT 6 +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_EN_SHIFT 7 + u8 flags4; +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_EN_SHIFT 0 +#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_EN_SHIFT 1 +#define TSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT 2 +#define TSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT 3 +#define TSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT 4 +#define TSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT 5 +#define TSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT 6 +#define TSTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK 0x1 +#define TSTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT 7 + u8 cleanup_state; + __le16 last_sent_tid; + __le32 rec_rr_tov_exp_timeout; + u8 byte3; + u8 byte4; + __le16 word2; + __le16 word3; + __le16 word4; + __le32 data_offset_end_of_seq; + __le32 data_offset_next; +}; + +struct fcoe_tstorm_fcoe_task_st_ctx_read_write { + union fcoe_cleanup_addr_exp_ro_union cleanup_addr_exp_ro_union; + __le16 flags; +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE_MASK 0x7 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE_SHIFT 0 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME_MASK 0x1 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME_SHIFT 3 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_ACTIVE_MASK 0x1 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_ACTIVE_SHIFT 4 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_TIMEOUT_MASK 0x1 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_TIMEOUT_SHIFT 5 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SINGLE_PKT_IN_EX_MASK 0x1 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SINGLE_PKT_IN_EX_SHIFT 6 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_OOO_RX_SEQ_STAT_MASK 0x1 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_OOO_RX_SEQ_STAT_SHIFT 7 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_CQ_ADD_ADV_MASK 0x3 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_CQ_ADD_ADV_SHIFT 8 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_MASK 0x3F +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_SHIFT 10 + __le16 seq_cnt; + u8 seq_id; + u8 ooo_rx_seq_id; + __le16 rx_id; + struct fcoe_abts_pkt abts_data; + __le32 e_d_tov_exp_timeout_val; + __le16 ooo_rx_seq_cnt; + __le16 reserved1; +}; + +struct fcoe_tstorm_fcoe_task_st_ctx_read_only { + u8 task_type; + u8 dev_type; + u8 conf_supported; + u8 glbl_q_num; + __le32 cid; + __le32 fcp_cmd_trns_size; + __le32 rsrv; +}; + +struct tstorm_fcoe_task_st_ctx { + struct fcoe_tstorm_fcoe_task_st_ctx_read_write read_write; + struct fcoe_tstorm_fcoe_task_st_ctx_read_only read_only; +}; + +struct mstorm_fcoe_task_ag_ctx { + u8 byte0; + u8 byte1; + __le16 icid; + u8 flags0; +#define MSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF +#define MSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0 +#define MSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4 +#define MSTORM_FCOE_TASK_AG_CTX_CQE_PLACED_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_CQE_PLACED_SHIFT 5 +#define MSTORM_FCOE_TASK_AG_CTX_BIT2_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_BIT2_SHIFT 6 +#define MSTORM_FCOE_TASK_AG_CTX_BIT3_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_BIT3_SHIFT 7 + u8 flags1; +#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_MASK 0x3 +#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_SHIFT 0 +#define MSTORM_FCOE_TASK_AG_CTX_CF1_MASK 0x3 +#define MSTORM_FCOE_TASK_AG_CTX_CF1_SHIFT 2 +#define MSTORM_FCOE_TASK_AG_CTX_CF2_MASK 0x3 +#define MSTORM_FCOE_TASK_AG_CTX_CF2_SHIFT 4 +#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_SHIFT 6 +#define MSTORM_FCOE_TASK_AG_CTX_CF1EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT 7 + u8 flags2; +#define MSTORM_FCOE_TASK_AG_CTX_CF2EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT 0 +#define MSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT 1 +#define MSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT 2 +#define MSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT 3 +#define MSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT 4 +#define MSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT 5 +#define MSTORM_FCOE_TASK_AG_CTX_XFER_PLACEMENT_EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_XFER_PLACEMENT_EN_SHIFT 6 +#define MSTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK 0x1 +#define MSTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT 7 + u8 cleanup_state; + __le32 received_bytes; + u8 byte3; + u8 glbl_q_num; + __le16 word1; + __le16 tid_to_xfer; + __le16 word3; + __le16 word4; + __le16 word5; + __le32 expected_bytes; + __le32 reg2; +}; + +struct mstorm_fcoe_task_st_ctx { + struct fcoe_mstorm_fcoe_task_st_ctx_non_fp non_fp; + struct fcoe_mstorm_fcoe_task_st_ctx_fp fp; +}; + +struct ustorm_fcoe_task_ag_ctx { + u8 reserved; + u8 byte1; + __le16 icid; + u8 flags0; +#define USTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK 0xF +#define USTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT 0 +#define USTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT 4 +#define USTORM_FCOE_TASK_AG_CTX_BIT1_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT 5 +#define USTORM_FCOE_TASK_AG_CTX_CF0_MASK 0x3 +#define USTORM_FCOE_TASK_AG_CTX_CF0_SHIFT 6 + u8 flags1; +#define USTORM_FCOE_TASK_AG_CTX_CF1_MASK 0x3 +#define USTORM_FCOE_TASK_AG_CTX_CF1_SHIFT 0 +#define USTORM_FCOE_TASK_AG_CTX_CF2_MASK 0x3 +#define USTORM_FCOE_TASK_AG_CTX_CF2_SHIFT 2 +#define USTORM_FCOE_TASK_AG_CTX_CF3_MASK 0x3 +#define USTORM_FCOE_TASK_AG_CTX_CF3_SHIFT 4 +#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_MASK 0x3 +#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_SHIFT 6 + u8 flags2; +#define USTORM_FCOE_TASK_AG_CTX_CF0EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_CF0EN_SHIFT 0 +#define USTORM_FCOE_TASK_AG_CTX_CF1EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT 1 +#define USTORM_FCOE_TASK_AG_CTX_CF2EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT 2 +#define USTORM_FCOE_TASK_AG_CTX_CF3EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_CF3EN_SHIFT 3 +#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT 4 +#define USTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT 5 +#define USTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT 6 +#define USTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT 7 + u8 flags3; +#define USTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT 0 +#define USTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT 1 +#define USTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT 2 +#define USTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK 0x1 +#define USTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT 3 +#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_TYPE_MASK 0xF +#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT 4 + __le32 dif_err_intervals; + __le32 dif_error_1st_interval; + __le32 global_cq_num; + __le32 reg3; + __le32 reg4; + __le32 reg5; +}; + +struct fcoe_task_context { + struct ystorm_fcoe_task_st_ctx ystorm_st_context; + struct tdif_task_context tdif_context; + struct ystorm_fcoe_task_ag_ctx ystorm_ag_context; + struct tstorm_fcoe_task_ag_ctx tstorm_ag_context; + struct timers_context timer_context; + struct tstorm_fcoe_task_st_ctx tstorm_st_context; + struct regpair tstorm_st_padding[2]; + struct mstorm_fcoe_task_ag_ctx mstorm_ag_context; + struct mstorm_fcoe_task_st_ctx mstorm_st_context; + struct ustorm_fcoe_task_ag_ctx ustorm_ag_context; + struct rdif_task_context rdif_context; +}; + +struct fcoe_tx_stat { + struct regpair fcoe_tx_byte_cnt; + struct regpair fcoe_tx_data_pkt_cnt; + struct regpair fcoe_tx_xfer_pkt_cnt; + struct regpair fcoe_tx_other_pkt_cnt; +}; + +struct fcoe_wqe { + __le16 task_id; + __le16 flags; +#define FCOE_WQE_REQ_TYPE_MASK 0xF +#define FCOE_WQE_REQ_TYPE_SHIFT 0 +#define FCOE_WQE_SGL_MODE_MASK 0x7 +#define FCOE_WQE_SGL_MODE_SHIFT 4 +#define FCOE_WQE_CONTINUATION_MASK 0x1 +#define FCOE_WQE_CONTINUATION_SHIFT 7 +#define FCOE_WQE_INVALIDATE_PTU_MASK 0x1 +#define FCOE_WQE_INVALIDATE_PTU_SHIFT 8 +#define FCOE_WQE_SUPER_IO_MASK 0x1 +#define FCOE_WQE_SUPER_IO_SHIFT 9 +#define FCOE_WQE_SEND_AUTO_RSP_MASK 0x1 +#define FCOE_WQE_SEND_AUTO_RSP_SHIFT 10 +#define FCOE_WQE_RESERVED0_MASK 0x1F +#define FCOE_WQE_RESERVED0_SHIFT 11 + union fcoe_additional_info_union additional_info_union; +}; + +struct xfrqe_prot_flags { + u8 flags; +#define XFRQE_PROT_FLAGS_PROT_INTERVAL_SIZE_LOG_MASK 0xF +#define XFRQE_PROT_FLAGS_PROT_INTERVAL_SIZE_LOG_SHIFT 0 +#define XFRQE_PROT_FLAGS_DIF_TO_PEER_MASK 0x1 +#define XFRQE_PROT_FLAGS_DIF_TO_PEER_SHIFT 4 +#define XFRQE_PROT_FLAGS_HOST_INTERFACE_MASK 0x3 +#define XFRQE_PROT_FLAGS_HOST_INTERFACE_SHIFT 5 +#define XFRQE_PROT_FLAGS_RESERVED_MASK 0x1 +#define XFRQE_PROT_FLAGS_RESERVED_SHIFT 7 +}; + +struct fcoe_db_data { + u8 params; +#define FCOE_DB_DATA_DEST_MASK 0x3 +#define FCOE_DB_DATA_DEST_SHIFT 0 +#define FCOE_DB_DATA_AGG_CMD_MASK 0x3 +#define FCOE_DB_DATA_AGG_CMD_SHIFT 2 +#define FCOE_DB_DATA_BYPASS_EN_MASK 0x1 +#define FCOE_DB_DATA_BYPASS_EN_SHIFT 4 +#define FCOE_DB_DATA_RESERVED_MASK 0x1 +#define FCOE_DB_DATA_RESERVED_SHIFT 5 +#define FCOE_DB_DATA_AGG_VAL_SEL_MASK 0x3 +#define FCOE_DB_DATA_AGG_VAL_SEL_SHIFT 6 + u8 agg_flags; + __le16 sq_prod; +}; +#endif /* __FCOE_COMMON__ */ diff --git a/include/linux/qed/qed_fcoe_if.h b/include/linux/qed/qed_fcoe_if.h new file mode 100644 index 000000000000..bd6bcb809415 --- /dev/null +++ b/include/linux/qed/qed_fcoe_if.h @@ -0,0 +1,145 @@ +#ifndef _QED_FCOE_IF_H +#define _QED_FCOE_IF_H +#include +#include +struct qed_fcoe_stats { + u64 fcoe_rx_byte_cnt; + u64 fcoe_rx_data_pkt_cnt; + u64 fcoe_rx_xfer_pkt_cnt; + u64 fcoe_rx_other_pkt_cnt; + u32 fcoe_silent_drop_pkt_cmdq_full_cnt; + u32 fcoe_silent_drop_pkt_rq_full_cnt; + u32 fcoe_silent_drop_pkt_crc_error_cnt; + u32 fcoe_silent_drop_pkt_task_invalid_cnt; + u32 fcoe_silent_drop_total_pkt_cnt; + + u64 fcoe_tx_byte_cnt; + u64 fcoe_tx_data_pkt_cnt; + u64 fcoe_tx_xfer_pkt_cnt; + u64 fcoe_tx_other_pkt_cnt; +}; + +struct qed_dev_fcoe_info { + struct qed_dev_info common; + + void __iomem *primary_dbq_rq_addr; + void __iomem *secondary_bdq_rq_addr; +}; + +struct qed_fcoe_params_offload { + dma_addr_t sq_pbl_addr; + dma_addr_t sq_curr_page_addr; + dma_addr_t sq_next_page_addr; + + u8 src_mac[ETH_ALEN]; + u8 dst_mac[ETH_ALEN]; + + u16 tx_max_fc_pay_len; + u16 e_d_tov_timer_val; + u16 rec_tov_timer_val; + u16 rx_max_fc_pay_len; + u16 vlan_tag; + + struct fc_addr_nw s_id; + u8 max_conc_seqs_c3; + struct fc_addr_nw d_id; + u8 flags; + u8 def_q_idx; +}; + +#define MAX_TID_BLOCKS_FCOE (512) +struct qed_fcoe_tid { + u32 size; /* In bytes per task */ + u32 num_tids_per_block; + u8 *blocks[MAX_TID_BLOCKS_FCOE]; +}; + +struct qed_fcoe_cb_ops { + struct qed_common_cb_ops common; + u32 (*get_login_failures)(void *cookie); +}; + +void qed_fcoe_set_pf_params(struct qed_dev *cdev, + struct qed_fcoe_pf_params *params); + +/** + * struct qed_fcoe_ops - qed FCoE operations. + * @common: common operations pointer + * @fill_dev_info: fills FCoE specific information + * @param cdev + * @param info + * @return 0 on sucesss, otherwise error value. + * @register_ops: register FCoE operations + * @param cdev + * @param ops - specified using qed_iscsi_cb_ops + * @param cookie - driver private + * @ll2: light L2 operations pointer + * @start: fcoe in FW + * @param cdev + * @param tasks - qed will fill information about tasks + * return 0 on success, otherwise error value. + * @stop: stops fcoe in FW + * @param cdev + * return 0 on success, otherwise error value. + * @acquire_conn: acquire a new fcoe connection + * @param cdev + * @param handle - qed will fill handle that should be + * used henceforth as identifier of the + * connection. + * @param p_doorbell - qed will fill the address of the + * doorbell. + * return 0 on sucesss, otherwise error value. + * @release_conn: release a previously acquired fcoe connection + * @param cdev + * @param handle - the connection handle. + * return 0 on success, otherwise error value. + * @offload_conn: configures an offloaded connection + * @param cdev + * @param handle - the connection handle. + * @param conn_info - the configuration to use for the + * offload. + * return 0 on success, otherwise error value. + * @destroy_conn: stops an offloaded connection + * @param cdev + * @param handle - the connection handle. + * @param terminate_params + * return 0 on success, otherwise error value. + * @get_stats: gets FCoE related statistics + * @param cdev + * @param stats - pointer to struck that would be filled + * we stats + * return 0 on success, error otherwise. + */ +struct qed_fcoe_ops { + const struct qed_common_ops *common; + + int (*fill_dev_info)(struct qed_dev *cdev, + struct qed_dev_fcoe_info *info); + + void (*register_ops)(struct qed_dev *cdev, + struct qed_fcoe_cb_ops *ops, void *cookie); + + const struct qed_ll2_ops *ll2; + + int (*start)(struct qed_dev *cdev, struct qed_fcoe_tid *tasks); + + int (*stop)(struct qed_dev *cdev); + + int (*acquire_conn)(struct qed_dev *cdev, + u32 *handle, + u32 *fw_cid, void __iomem **p_doorbell); + + int (*release_conn)(struct qed_dev *cdev, u32 handle); + + int (*offload_conn)(struct qed_dev *cdev, + u32 handle, + struct qed_fcoe_params_offload *conn_info); + int (*destroy_conn)(struct qed_dev *cdev, + u32 handle, dma_addr_t terminate_params); + + int (*get_stats)(struct qed_dev *cdev, struct qed_fcoe_stats *stats); +}; + +const struct qed_fcoe_ops *qed_get_fcoe_ops(void); +void qed_put_fcoe_ops(void); +#endif diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index d1576a2bcfc9..fde56c436f71 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -59,7 +59,6 @@ enum dcbx_protocol_type { #define QED_ROCE_PROTOCOL_INDEX (3) -#ifdef CONFIG_DCB #define QED_LLDP_CHASSIS_ID_STAT_LEN 4 #define QED_LLDP_PORT_ID_STAT_LEN 4 #define QED_DCBX_MAX_APP_PROTOCOL 32 @@ -155,7 +154,6 @@ struct qed_dcbx_get { struct qed_dcbx_remote_params remote; struct qed_dcbx_admin_params local; }; -#endif enum qed_led_mode { QED_LED_MODE_OFF, @@ -182,6 +180,38 @@ struct qed_eth_pf_params { u16 num_cons; }; +struct qed_fcoe_pf_params { + /* The following parameters are used during protocol-init */ + u64 glbl_q_params_addr; + u64 bdq_pbl_base_addr[2]; + + /* The following parameters are used during HW-init + * and these parameters need to be passed as arguments + * to update_pf_params routine invoked before slowpath start + */ + u16 num_cons; + u16 num_tasks; + + /* The following parameters are used during protocol-init */ + u16 sq_num_pbl_pages; + + u16 cq_num_entries; + u16 cmdq_num_entries; + u16 rq_buffer_log_size; + u16 mtu; + u16 dummy_icid; + u16 bdq_xoff_threshold[2]; + u16 bdq_xon_threshold[2]; + u16 rq_buffer_size; + u8 num_cqs; /* num of global CQs */ + u8 log_page_size; + u8 gl_rq_pi; + u8 gl_cmd_pi; + u8 debug_mode; + u8 is_target; + u8 bdq_pbl_num_entries[2]; +}; + /* Most of the the parameters below are described in the FW iSCSI / TCP HSI */ struct qed_iscsi_pf_params { u64 glbl_q_params_addr; @@ -245,6 +275,7 @@ struct qed_rdma_pf_params { struct qed_pf_params { struct qed_eth_pf_params eth_pf_params; + struct qed_fcoe_pf_params fcoe_pf_params; struct qed_iscsi_pf_params iscsi_pf_params; struct qed_rdma_pf_params rdma_pf_params; }; @@ -305,6 +336,7 @@ enum qed_sb_type { enum qed_protocol { QED_PROTOCOL_ETH, QED_PROTOCOL_ISCSI, + QED_PROTOCOL_FCOE, }; enum qed_link_mode_bits { @@ -391,6 +423,7 @@ struct qed_int_info { struct qed_common_cb_ops { void (*link_update)(void *dev, struct qed_link_output *link); + void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type); }; struct qed_selftest_ops { @@ -494,6 +527,10 @@ struct qed_common_ops { void (*simd_handler_clean)(struct qed_dev *cdev, int index); + int (*dbg_grc)(struct qed_dev *cdev, + void *buffer, u32 *num_dumped_bytes); + + int (*dbg_grc_size)(struct qed_dev *cdev); int (*dbg_all_data) (struct qed_dev *cdev, void *buffer); -- cgit v1.2.3 From bd4b9f8b4af7be15fd162276ec9a2a1d49f10270 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 17 Feb 2017 12:45:37 +0800 Subject: sctp: add support for generating stream reconf resp chunk This patch is to define Re-configuration Response Parameter described in rfc6525 section 4.4. As optional fields are only for SSN/TSN Reset Request Parameter, it uses another function to make that. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index b055788de0cf..7a4804c4a593 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -749,4 +749,28 @@ struct sctp_strreset_addstrm { __u16 reserved; }; +enum { + SCTP_STRRESET_NOTHING_TO_DO = 0x00, + SCTP_STRRESET_PERFORMED = 0x01, + SCTP_STRRESET_DENIED = 0x02, + SCTP_STRRESET_ERR_WRONG_SSN = 0x03, + SCTP_STRRESET_ERR_IN_PROGRESS = 0x04, + SCTP_STRRESET_ERR_BAD_SEQNO = 0x05, + SCTP_STRRESET_IN_PROGRESS = 0x06, +}; + +struct sctp_strreset_resp { + sctp_paramhdr_t param_hdr; + __u32 response_seq; + __u32 result; +}; + +struct sctp_strreset_resptsn { + sctp_paramhdr_t param_hdr; + __u32 response_seq; + __u32 result; + __u32 senders_next_tsn; + __u32 receivers_next_tsn; +}; + #endif /* __LINUX_SCTP_H__ */ -- cgit v1.2.3 From d24cdcd3e40a6825135498e11c20c7976b9bf545 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Jan 2017 12:06:09 +0100 Subject: libceph: use BUG() instead of BUG_ON(1) I ran into this compile warning, which is the result of BUG_ON(1) not always leading to the compiler treating the code path as unreachable: include/linux/ceph/osdmap.h: In function 'ceph_can_shift_osds': include/linux/ceph/osdmap.h:62:1: error: control reaches end of non-void function [-Werror=return-type] Using BUG() here avoids the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 9a9041784dcf..412906609954 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -57,7 +57,7 @@ static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool) case CEPH_POOL_TYPE_EC: return false; default: - BUG_ON(1); + BUG(); } } -- cgit v1.2.3 From 66a0e2d579dbec5c676cfe446234ffebb267c564 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 31 Jan 2017 15:55:06 +0100 Subject: crush: remove mutable part of CRUSH map Then add it to the working state. It would be very nice if we didn't have to take a lock to calculate a crush placement. By moving the permutation array into the working data, we can treat the CRUSH map as immutable. Reflects ceph.git commit cbcd039651c0569551cb90d26ce27e1432671f2a. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 1 + include/linux/crush/crush.h | 41 ++++++++++++++++++++++++++++++++++------- include/linux/crush/mapper.h | 4 +++- 3 files changed, 38 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 412906609954..cef1cab789b9 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -175,6 +175,7 @@ struct ceph_osdmap { struct mutex crush_scratch_mutex; int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3]; + void *crush_workspace; }; static inline bool ceph_osd_exists(struct ceph_osdmap *map, int osd) diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index be8f12b8f195..fbecbd089d75 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -135,13 +135,6 @@ struct crush_bucket { __u32 size; /* num items */ __s32 *items; - /* - * cached random permutation: used for uniform bucket and for - * the linear search fallback for the other bucket types. - */ - __u32 perm_x; /* @x for which *perm is defined */ - __u32 perm_n; /* num elements of *perm that are permuted/defined */ - __u32 *perm; }; struct crush_bucket_uniform { @@ -211,6 +204,21 @@ struct crush_map { * device fails. */ __u8 chooseleaf_stable; + /* + * This value is calculated after decode or construction by + * the builder. It is exposed here (rather than having a + * 'build CRUSH working space' function) so that callers can + * reserve a static buffer, allocate space on the stack, or + * otherwise avoid calling into the heap allocator if they + * want to. The size of the working space depends on the map, + * while the size of the scratch vector passed to the mapper + * depends on the size of the desired result set. + * + * Nothing stops the caller from allocating both in one swell + * foop and passing in two points, though. + */ + size_t working_size; + #ifndef __KERNEL__ /* * version 0 (original) of straw_calc has various flaws. version 1 @@ -248,4 +256,23 @@ static inline int crush_calc_tree_node(int i) return ((i+1) << 1)-1; } +/* + * These data structures are private to the CRUSH implementation. They + * are exposed in this header file because builder needs their + * definitions to calculate the total working size. + * + * Moving this out of the crush map allow us to treat the CRUSH map as + * immutable within the mapper and removes the requirement for a CRUSH + * map lock. + */ +struct crush_work_bucket { + __u32 perm_x; /* @x for which *perm is defined */ + __u32 perm_n; /* num elements of *perm that are permuted/defined */ + __u32 *perm; /* Permutation of the bucket's items */ +}; + +struct crush_work { + struct crush_work_bucket **work; /* Per-bucket working store */ +}; + #endif diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index 5dfd5b1125d2..3303c7fd8a31 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -15,6 +15,8 @@ extern int crush_do_rule(const struct crush_map *map, int ruleno, int x, int *result, int result_max, const __u32 *weights, int weight_max, - int *scratch); + void *cwin, int *scratch); + +void crush_init_workspace(const struct crush_map *map, void *v); #endif -- cgit v1.2.3 From 743efcffffc6620ab44ea9ec67c7e4e28dfa7742 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 31 Jan 2017 15:55:06 +0100 Subject: crush: merge working data and scratch Much like Arlo Guthrie, I decided that one big pile is better than two little piles. Reflects ceph.git commit 95c2df6c7e0b22d2ea9d91db500cf8b9441c73ba. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 3 +-- include/linux/crush/mapper.h | 14 +++++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index cef1cab789b9..8cebdc4158c3 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -173,8 +173,7 @@ struct ceph_osdmap { * the list of osds that store+replicate them. */ struct crush_map *crush; - struct mutex crush_scratch_mutex; - int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3]; + struct mutex crush_workspace_mutex; void *crush_workspace; }; diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index 3303c7fd8a31..c95e19e1ff11 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -15,7 +15,19 @@ extern int crush_do_rule(const struct crush_map *map, int ruleno, int x, int *result, int result_max, const __u32 *weights, int weight_max, - void *cwin, int *scratch); + void *cwin); + +/* + * Returns the exact amount of workspace that will need to be used + * for a given combination of crush_map and result_max. The caller can + * then allocate this much on its own, either on the stack, in a + * per-thread long-lived buffer, or however it likes. + */ +static inline size_t crush_work_size(const struct crush_map *map, + int result_max) +{ + return map->working_size + result_max * 3 * sizeof(__u32); +} void crush_init_workspace(const struct crush_map *map, void *v); -- cgit v1.2.3 From 083a51fbc57ca848ab087692f3cc97898fd88b54 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 9 Feb 2017 16:14:52 +0100 Subject: libceph: bump CEPH_PG_MAX_SIZE to 32 ... to accommodate potentially very wide EC pools. This increases the size of a typical rbd ceph_osd_request by ~12% (from 1040 to 1168 bytes), but I'd rather go future proof here. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/rados.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 5c0da61cb763..5d0018782d50 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -50,7 +50,7 @@ struct ceph_timespec { #define CEPH_PG_LAYOUT_LINEAR 2 #define CEPH_PG_LAYOUT_HYBRID 3 -#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */ +#define CEPH_PG_MAX_SIZE 32 /* max # osds in a single pg */ /* * placement group. -- cgit v1.2.3 From 6c696d8560e74cd42458931375875d62ae88c6ae Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 25 Jan 2017 18:16:23 +0100 Subject: rbd: kill obj_request->object_name and rbd_segment_name_cache Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- include/linux/ceph/osdmap.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 8cebdc4158c3..938656f70807 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -81,13 +81,6 @@ void ceph_oloc_copy(struct ceph_object_locator *dest, const struct ceph_object_locator *src); void ceph_oloc_destroy(struct ceph_object_locator *oloc); -/* - * Maximum supported by kernel client object name length - * - * (probably outdated: must be >= RBD_MAX_MD_NAME_LEN -- currently 100) - */ -#define CEPH_MAX_OID_NAME_LEN 100 - /* * 51-char inline_name is long enough for all cephfs and all but one * rbd requests: in ".rbd"/"rbd_id." can be -- cgit v1.2.3 From 42f82367df2cb2b71ceead6164e4415851c51fa4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 20 Feb 2017 13:51:07 +0100 Subject: video: fbdev: fsl-diu-fb: fix spelling mistake "palette" trivial fix to spelling mistakes of "palette" Signed-off-by: Colin Ian King Acked-by: Timur Tabi Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/fsl-diu-fb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsl-diu-fb.h b/include/linux/fsl-diu-fb.h index a1e8277120c7..c46eab5bc893 100644 --- a/include/linux/fsl-diu-fb.h +++ b/include/linux/fsl-diu-fb.h @@ -73,7 +73,7 @@ struct diu_ad { /* Word 0(32-bit) in DDR memory */ /* __u16 comp; */ /* __u16 pixel_s:2; */ -/* __u16 pallete:1; */ +/* __u16 palette:1; */ /* __u16 red_c:2; */ /* __u16 green_c:2; */ /* __u16 blue_c:2; */ @@ -142,7 +142,7 @@ struct diu_ad { struct diu { __be32 desc[3]; __be32 gamma; - __be32 pallete; + __be32 palette; __be32 cursor; __be32 curs_pos; __be32 diu_mode; -- cgit v1.2.3 From 25149ef9d25cafc4f4fe9f4461f18f876f397417 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 17 Feb 2017 16:07:34 -0800 Subject: net: phy: Check phydev->drv There are number of function calls, originating from user-space, typically through the Ethernet driver that can make us crash by dereferencing phydev->drv which will be NULL once we unbind the driver from the PHY. There are still functional issues that prevent an unbind then rebind to work, but these will be addressed separately. Suggested-by: Russell King Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index d9bdf53e0514..772476028a65 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -807,6 +807,9 @@ int phy_stop_interrupts(struct phy_device *phydev); static inline int phy_read_status(struct phy_device *phydev) { + if (!phydev->drv) + return -EIO; + return phydev->drv->read_status(phydev); } -- cgit v1.2.3 From e71695307114335be1ed912f4a347396c2ed0e69 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 19 Feb 2017 07:17:17 +0200 Subject: ptr_ring: fix race conditions when resizing Resizing currently drops consumer lock. This can cause entries to be reordered, which isn't good in itself. More importantly, consumer can detect a false ring empty condition and block forever. Further, nesting of consumer within producer lock is problematic for tun, since it produces entries in a BH, which causes a lock order reversal: CPU0 CPU1 ---- ---- consume: lock(&(&r->consumer_lock)->rlock); resize: local_irq_disable(); lock(&(&r->producer_lock)->rlock); lock(&(&r->consumer_lock)->rlock); produce: lock(&(&r->producer_lock)->rlock); To fix, nest producer lock within consumer lock during resize, and keep consumer lock during the whole swap operation. Reported-by: Dmitry Vyukov Cc: stable@vger.kernel.org Cc: "David S. Miller" Acked-by: Jason Wang Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 2052011bf9fb..6c70444da3b9 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -111,6 +111,11 @@ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) return 0; } +/* + * Note: resize (below) nests producer lock within consumer lock, so if you + * consume in interrupt or BH context, you must disable interrupts/BH when + * calling this. + */ static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr) { int ret; @@ -242,6 +247,11 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r) return ptr; } +/* + * Note: resize (below) nests producer lock within consumer lock, so if you + * call this in interrupt or BH context, you must disable interrupts/BH when + * producing. + */ static inline void *ptr_ring_consume(struct ptr_ring *r) { void *ptr; @@ -357,7 +367,7 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, void **old; void *ptr; - while ((ptr = ptr_ring_consume(r))) + while ((ptr = __ptr_ring_consume(r))) if (producer < size) queue[producer++] = ptr; else if (destroy) @@ -372,6 +382,12 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, return old; } +/* + * Note: producer lock is nested within consumer lock, so if you + * resize you must make sure all uses nest correctly. + * In particular if you consume ring in interrupt or BH context, you must + * disable interrupts/BH when doing so. + */ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, void (*destroy)(void *)) { @@ -382,17 +398,25 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, if (!queue) return -ENOMEM; - spin_lock_irqsave(&(r)->producer_lock, flags); + spin_lock_irqsave(&(r)->consumer_lock, flags); + spin_lock(&(r)->producer_lock); old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy); - spin_unlock_irqrestore(&(r)->producer_lock, flags); + spin_unlock(&(r)->producer_lock); + spin_unlock_irqrestore(&(r)->consumer_lock, flags); kfree(old); return 0; } +/* + * Note: producer lock is nested within consumer lock, so if you + * resize you must make sure all uses nest correctly. + * In particular if you consume ring in interrupt or BH context, you must + * disable interrupts/BH when doing so. + */ static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings, int size, gfp_t gfp, void (*destroy)(void *)) @@ -412,10 +436,12 @@ static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings, } for (i = 0; i < nrings; ++i) { - spin_lock_irqsave(&(rings[i])->producer_lock, flags); + spin_lock_irqsave(&(rings[i])->consumer_lock, flags); + spin_lock(&(rings[i])->producer_lock); queues[i] = __ptr_ring_swap_queue(rings[i], queues[i], size, gfp, destroy); - spin_unlock_irqrestore(&(rings[i])->producer_lock, flags); + spin_unlock(&(rings[i])->producer_lock); + spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags); } for (i = 0; i < nrings; ++i) -- cgit v1.2.3 From bb7462b6fd64e40809a857223bf7f0e628969f87 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 20 Feb 2017 16:51:23 +0100 Subject: vfs: use helpers for calling f_op->{read,write}_iter() Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 78c81e6f5d76..a3145cdff8f2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1715,6 +1715,18 @@ struct inode_operations { int (*set_acl)(struct inode *, struct posix_acl *, int); } ____cacheline_aligned; +static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio, + struct iov_iter *iter) +{ + return file->f_op->read_iter(kio, iter); +} + +static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio, + struct iov_iter *iter) +{ + return file->f_op->write_iter(kio, iter); +} + ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, -- cgit v1.2.3 From f74ac01520c9f6d89bbc3c6931a72f757b742f86 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 20 Feb 2017 16:51:23 +0100 Subject: mm: use helper for calling f_op->mmap() Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index a3145cdff8f2..93691b59e476 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1727,6 +1727,11 @@ static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio, return file->f_op->write_iter(kio, iter); } +static inline int call_mmap(struct file *file, struct vm_area_struct *vma) +{ + return file->f_op->mmap(file, vma); +} + ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, -- cgit v1.2.3 From 0eb8af4916a540c362a2950e5ab54eca32eb7d58 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 20 Feb 2017 16:51:23 +0100 Subject: vfs: use helper for calling f_op->fsync() Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 93691b59e476..72a33007ec24 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1732,6 +1732,12 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma) return file->f_op->mmap(file, vma); } +static inline int call_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + return file->f_op->fsync(file, start, end, datasync); +} + ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, -- cgit v1.2.3 From 3498d8694d41af701c51289e7caf3ffefc7bfb6b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 21 Feb 2017 14:19:45 +0100 Subject: gpio: reintroduce devm_get_gpiod_from_child() We need to keep this API around for the merge window to avoid nasty build problems in the merges. Cc: Lee Jones Suggested-by: Greg Kroah-Hartman Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index ea9b01d40017..2484b2fcc6eb 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -143,6 +143,16 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, struct fwnode_handle *child, enum gpiod_flags flags, const char *label); +/* FIXME: delete this helper when users are switched over */ +static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, + const char *con_id, struct fwnode_handle *child) +{ + return devm_fwnode_get_index_gpiod_from_child(dev, con_id, + 0, child, + GPIOD_ASIS, + "?"); +} + #else /* CONFIG_GPIOLIB */ static inline int gpiod_count(struct device *dev, const char *con_id) @@ -434,6 +444,13 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, return ERR_PTR(-ENOSYS); } +/* FIXME: delete this when all users are switched over */ +static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, + const char *con_id, struct fwnode_handle *child) +{ + return ERR_PTR(-ENOSYS); +} + #endif /* CONFIG_GPIOLIB */ static inline -- cgit v1.2.3 From 9d876e79df6a2f364b9f2737eacd72ceb27da53a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 21 Feb 2017 16:09:34 +0100 Subject: bpf: fix unlocking of jited image when module ronx not set Eric and Willem reported that they recently saw random crashes when JIT was in use and bisected this to 74451e66d516 ("bpf: make jited programs visible in traces"). Issue was that the consolidation part added bpf_jit_binary_unlock_ro() that would unlock previously made read-only memory back to read-write. However, DEBUG_SET_MODULE_RONX cannot be used for this to test for presence of set_memory_*() functions. We need to use ARCH_HAS_SET_MEMORY instead to fix this; also add the corresponding bpf_jit_binary_lock_ro() to filter.h. Fixes: 74451e66d516 ("bpf: make jited programs visible in traces") Reported-by: Eric Dumazet Reported-by: Willem de Bruijn Bisected-by: Eric Dumazet Signed-off-by: Daniel Borkmann Tested-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/filter.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0c1cc9143cb2..0c167fdee5f7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -551,7 +551,7 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog) #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) -#ifdef CONFIG_DEBUG_SET_MODULE_RONX +#ifdef CONFIG_ARCH_HAS_SET_MEMORY static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { set_memory_ro((unsigned long)fp, fp->pages); @@ -562,6 +562,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) set_memory_rw((unsigned long)fp, fp->pages); } +static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) +{ + set_memory_ro((unsigned long)hdr, hdr->pages); +} + static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) { set_memory_rw((unsigned long)hdr, hdr->pages); @@ -575,10 +580,14 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { } +static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) +{ +} + static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) { } -#endif /* CONFIG_DEBUG_SET_MODULE_RONX */ +#endif /* CONFIG_ARCH_HAS_SET_MEMORY */ static inline struct bpf_binary_header * bpf_jit_binary_hdr(const struct bpf_prog *fp) -- cgit v1.2.3 From 0500ce589aa7b5325af161d3c992ffb6be138ff9 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Wed, 15 Feb 2017 09:35:27 -0700 Subject: rtc: m48t86: remove unused platform_data All users of this driver have been updated to allow the driver to manage it's own resources and do the read/write operations internally. The m48t86_ops are no longer used. Remove the platform_data header and the support code in the driver. Signed-off-by: H Hartley Sweeten Signed-off-by: Alexandre Belloni --- include/linux/platform_data/rtc-m48t86.h | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 include/linux/platform_data/rtc-m48t86.h (limited to 'include/linux') diff --git a/include/linux/platform_data/rtc-m48t86.h b/include/linux/platform_data/rtc-m48t86.h deleted file mode 100644 index 915d6b4f0f89..000000000000 --- a/include/linux/platform_data/rtc-m48t86.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * ST M48T86 / Dallas DS12887 RTC driver - * Copyright (c) 2006 Tower Technologies - * - * Author: Alessandro Zummo - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. -*/ - -struct m48t86_ops -{ - void (*writebyte)(unsigned char value, unsigned long addr); - unsigned char (*readbyte)(unsigned long addr); -}; -- cgit v1.2.3 From 0ae060ca2bdfe11181094699b0f76437ec210b17 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 19 Feb 2017 16:08:25 -0500 Subject: SUNRPC: Add generic helpers for xdr_stream encode/decode Add some generic helpers for encoding/decoding opaque structures and basic u32/u64. Signed-off-by: Trond Myklebust Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 177 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 56c48c884a24..dc7e51a769ac 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -242,6 +242,183 @@ extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +/** + * xdr_align_size - Calculate padded size of an object + * @n: Size of an object being XDR encoded (in bytes) + * + * Return value: + * Size (in bytes) of the object including xdr padding + */ +static inline size_t +xdr_align_size(size_t n) +{ + const size_t mask = sizeof(__u32) - 1; + + return (n + mask) & ~mask; +} + +/** + * xdr_stream_encode_u32 - Encode a 32-bit integer + * @xdr: pointer to xdr_stream + * @n: integer to encode + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_u32(struct xdr_stream *xdr, __u32 n) +{ + const size_t len = sizeof(n); + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + *p = cpu_to_be32(n); + return len; +} + +/** + * xdr_stream_encode_u64 - Encode a 64-bit integer + * @xdr: pointer to xdr_stream + * @n: 64-bit integer to encode + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_u64(struct xdr_stream *xdr, __u64 n) +{ + const size_t len = sizeof(n); + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + xdr_encode_hyper(p, n); + return len; +} + +/** + * xdr_stream_encode_opaque_fixed - Encode fixed length opaque xdr data + * @xdr: pointer to xdr_stream + * @ptr: pointer to opaque data object + * @len: size of object pointed to by @ptr + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_opaque_fixed(struct xdr_stream *xdr, const void *ptr, size_t len) +{ + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + xdr_encode_opaque_fixed(p, ptr, len); + return xdr_align_size(len); +} + +/** + * xdr_stream_encode_opaque - Encode variable length opaque xdr data + * @xdr: pointer to xdr_stream + * @ptr: pointer to opaque data object + * @len: size of object pointed to by @ptr + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_opaque(struct xdr_stream *xdr, const void *ptr, size_t len) +{ + size_t count = sizeof(__u32) + xdr_align_size(len); + __be32 *p = xdr_reserve_space(xdr, count); + + if (unlikely(!p)) + return -EMSGSIZE; + xdr_encode_opaque(p, ptr, len); + return count; +} + +/** + * xdr_stream_decode_u32 - Decode a 32-bit integer + * @xdr: pointer to xdr_stream + * @ptr: location to store integer + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + *ptr = be32_to_cpup(p); + return 0; +} + +/** + * xdr_stream_decode_opaque_fixed - Decode fixed length opaque xdr data + * @xdr: pointer to xdr_stream + * @ptr: location to store data + * @len: size of buffer pointed to by @ptr + * + * Return values: + * On success, returns size of object stored in @ptr + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_opaque_fixed(struct xdr_stream *xdr, void *ptr, size_t len) +{ + __be32 *p = xdr_inline_decode(xdr, len); + + if (unlikely(!p)) + return -EBADMSG; + xdr_decode_opaque_fixed(p, ptr, len); + return len; +} + +/** + * xdr_stream_decode_opaque_inline - Decode variable length opaque xdr data + * @xdr: pointer to xdr_stream + * @ptr: location to store pointer to opaque data + * @maxlen: maximum acceptable object size + * + * Note: the pointer stored in @ptr cannot be assumed valid after the XDR + * buffer has been destroyed, or even after calling xdr_inline_decode() + * on @xdr. It is therefore expected that the object it points to should + * be processed immediately. + * + * Return values: + * On success, returns size of object stored in *@ptr + * %-EBADMSG on XDR buffer overflow + * %-EMSGSIZE if the size of the object would exceed @maxlen + */ +static inline ssize_t +xdr_stream_decode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t maxlen) +{ + __be32 *p; + __u32 len; + + *ptr = NULL; + if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) + return -EBADMSG; + if (len != 0) { + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return -EBADMSG; + if (unlikely(len > maxlen)) + return -EMSGSIZE; + *ptr = p; + } + return len; +} #endif /* __KERNEL__ */ #endif /* _SUNRPC_XDR_H_ */ -- cgit v1.2.3 From 5c741d4f2215371ae17e3dbdf3d46da850662e13 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 19 Feb 2017 16:08:31 -0500 Subject: SUNRPC: Add a helper function xdr_stream_decode_string_dup() Create a helper function that decodes a xdr string object, allocates a memory buffer and then store it as a NUL terminated string. Signed-off-by: Trond Myklebust Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index dc7e51a769ac..054c8cde18f3 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -242,6 +242,8 @@ extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, + size_t maxlen, gfp_t gfp_flags); /** * xdr_align_size - Calculate padded size of an object * @n: Size of an object being XDR encoded (in bytes) -- cgit v1.2.3 From 986994a27587efd8ce4c595cab89b570f7475359 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 26 Jan 2017 17:17:28 +0200 Subject: nvme: Use CNS as 8-bit field and avoid endianness conversion This patch defines CNS field as 8-bit field and avoids cpu_to/from_le conversions. Also initialize nvme_command cns value explicitly to NVME_ID_CNS_NS for readability (don't rely on the fact that NVME_ID_CNS_NS = 0). Reviewed-by: Max Gurtovoy Signed-off-by: Parav Pandit Reviewed-by: Keith Busch Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/nvme.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 0b676a02cf3e..5b32521456d6 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -644,7 +644,9 @@ struct nvme_identify { __le32 nsid; __u64 rsvd2[2]; union nvme_data_ptr dptr; - __le32 cns; + __u8 cns; + __u8 rsvd3; + __le16 ctrlid; __u32 rsvd11[5]; }; -- cgit v1.2.3 From c5552fde102fcc3f2cf9e502b8ac90e3500d8fdf Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 7 Feb 2017 10:08:45 -0800 Subject: nvme: Enable autonomous power state transitions NVMe devices can advertise multiple power states. These states can be either "operational" (the device is fully functional but possibly slow) or "non-operational" (the device is asleep until woken up). Some devices can automatically enter a non-operational state when idle for a specified amount of time and then automatically wake back up when needed. The hardware configuration is a table. For each state, an entry in the table indicates the next deeper non-operational state, if any, to autonomously transition to and the idle time required before transitioning. This patch teaches the driver to program APST so that each successive non-operational state will be entered after an idle time equal to 100% of the total latency (entry plus exit) associated with that state. The maximum acceptable latency is controlled using dev_pm_qos (e.g. power/pm_qos_latency_tolerance_us in sysfs); non-operational states with total latency greater than this value will not be used. As a special case, setting the latency tolerance to 0 will disable APST entirely. On hardware without APST support, the sysfs file will not be exposed. The latency tolerance for newly-probed devices is set by the module parameter nvme_core.default_ps_max_latency_us. In theory, the device can expose "default" APST table, but this doesn't seem to function correctly on my device (Samsung 950), nor does it seem particularly useful. There is also an optional mechanism by which a configuration can be "saved" so it will be automatically loaded on reset. This can be configured from userspace, but it doesn't seem useful to support in the driver. On my laptop, enabling APST seems to save nearly 1W. The hardware tables can be decoded in userspace with nvme-cli. 'nvme id-ctrl /dev/nvmeN' will show the power state table and 'nvme get-feature -f 0x0c -H /dev/nvme0' will show the current APST configuration. This feature is quirked off on a known-buggy Samsung device. Signed-off-by: Andy Lutomirski Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/nvme.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 5b32521456d6..c43d435d4225 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -579,6 +579,12 @@ struct nvme_write_zeroes_cmd { __le16 appmask; }; +/* Features */ + +struct nvme_feat_auto_pst { + __le64 entries[32]; +}; + /* Admin commands */ enum nvme_admin_opcode { -- cgit v1.2.3 From 3ee80c3d15b61e61611903033df414a0590cfde9 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 20 Feb 2017 13:44:28 +0200 Subject: nvme-rdma: move nvme cm status helper to .h file This will enable the usage for nvme rdma target. Also move from a lookup array to a switch statement. Signed-off-by: Max Gurtovoy Reviewed-by: Parav Pandit Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/nvme-rdma.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index bf240a3cbf99..a72fd04aa5e1 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -29,6 +29,30 @@ enum nvme_rdma_cm_status { NVME_RDMA_CM_INVALID_ORD = 0x08, }; +static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status) +{ + switch (status) { + case NVME_RDMA_CM_INVALID_LEN: + return "invalid length"; + case NVME_RDMA_CM_INVALID_RECFMT: + return "invalid record format"; + case NVME_RDMA_CM_INVALID_QID: + return "invalid queue ID"; + case NVME_RDMA_CM_INVALID_HSQSIZE: + return "invalid host SQ size"; + case NVME_RDMA_CM_INVALID_HRQSIZE: + return "invalid host RQ size"; + case NVME_RDMA_CM_NO_RSC: + return "resource not found"; + case NVME_RDMA_CM_INVALID_IRD: + return "invalid IRD"; + case NVME_RDMA_CM_INVALID_ORD: + return "Invalid ORD"; + default: + return "unrecognized reason"; + } +} + /** * struct nvme_rdma_cm_req - rdma connect request * -- cgit v1.2.3 From d3213e8fd4b0f18dfd438268ff480406ba743abb Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 22 Feb 2017 15:39:47 -0800 Subject: tracing: add __print_flags_u64() Patch series "DAX tracepoints, mm argument simplification", v4. This contains both my DAX tracepoint code and Dave Jiang's MM argument simplifications. Dave's code was written with my tracepoint code as a baseline, so it seemed simplest to keep them together in a single series. This patch (of 7): Add __print_flags_u64() and the helper trace_print_flags_seq_u64() in the same spirit as __print_symbolic_u64() and trace_print_symbols_seq_u64(). These functions allow us to print symbols associated with flags that are 64 bits wide even on 32 bit machines. These will be used by the DAX code so that we can print the flags set in a pfn_t such as PFN_SG_CHAIN, PFN_SG_LAST, PFN_DEV and PFN_MAP. Without this new function I was getting errors like the following when compiling for i386: include/linux/pfn_t.h:13:22: warning: large integer implicitly truncated to unsigned type [-Woverflow] #define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1)) ^ Link: http://lkml.kernel.org/r/1484085142-2297-2-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Steven Rostedt Cc: Dave Chinner Cc: Dave Jiang Cc: Jan Kara Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/trace_events.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 0f165507495c..0af63c4381b9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -23,6 +23,10 @@ const char *trace_print_symbols_seq(struct trace_seq *p, unsigned long val, const struct trace_print_flags *symbol_array); #if BITS_PER_LONG == 32 +const char *trace_print_flags_seq_u64(struct trace_seq *p, const char *delim, + unsigned long long flags, + const struct trace_print_flags_u64 *flag_array); + const char *trace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, const struct trace_print_flags_u64 -- cgit v1.2.3 From 282a8e0391c377b64c7d065b18fc2f6267a24dad Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 22 Feb 2017 15:39:50 -0800 Subject: dax: add tracepoint infrastructure, PMD tracing Tracepoints are the standard way to capture debugging and tracing information in many parts of the kernel, including the XFS and ext4 filesystems. Create a tracepoint header for FS DAX and add the first DAX tracepoints to the PMD fault handler. This allows the tracing for DAX to be done in the same way as the filesystem tracing so that developers can look at them together and get a coherent idea of what the system is doing. I added both an entry and exit tracepoint because future patches will add tracepoints to child functions of dax_iomap_pmd_fault() like dax_pmd_load_hole() and dax_pmd_insert_mapping(). We want those messages to be wrapped by the parent function tracepoints so the code flow is more easily understood. Having entry and exit tracepoints for faults also allows us to easily see what filesystems functions were called during the fault. These filesystem functions get executed via iomap_begin() and iomap_end() calls, for example, and will have their own tracepoints. For PMD faults we primarily want to understand the type of mapping, the fault flags, the faulting address and whether it fell back to 4k faults. If it fell back to 4k faults the tracepoints should let us understand why. I named the new tracepoint header file "fs_dax.h" to allow for device DAX to have its own separate tracing header in the same directory at some point. Here is an example output for these events from a successful PMD fault: big-1441 [005] .... 32.582758: xfs_filemap_pmd_fault: dev 259:0 ino 0x1003 big-1441 [005] .... 32.582776: dax_pmd_fault: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10505000 vm_start 0x10200000 vm_end 0x10700000 pgoff 0x200 max_pgoff 0x1400 big-1441 [005] .... 32.583292: dax_pmd_fault_done: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10505000 vm_start 0x10200000 vm_end 0x10700000 pgoff 0x200 max_pgoff 0x1400 NOPAGE Link: http://lkml.kernel.org/r/1484085142-2297-3-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Suggested-by: Dave Chinner Reviewed-by: Jan Kara Acked-by: Steven Rostedt Cc: Dave Jiang Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6ff66d6fe8e2..d22f7837ad90 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -285,6 +285,17 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ #define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ +#define FAULT_FLAG_TRACE \ + { FAULT_FLAG_WRITE, "WRITE" }, \ + { FAULT_FLAG_MKWRITE, "MKWRITE" }, \ + { FAULT_FLAG_ALLOW_RETRY, "ALLOW_RETRY" }, \ + { FAULT_FLAG_RETRY_NOWAIT, "RETRY_NOWAIT" }, \ + { FAULT_FLAG_KILLABLE, "KILLABLE" }, \ + { FAULT_FLAG_TRIED, "TRIED" }, \ + { FAULT_FLAG_USER, "USER" }, \ + { FAULT_FLAG_REMOTE, "REMOTE" }, \ + { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" } + /* * vm_fault is filled by the the pagefault handler and passed to the vma's * ->fault function. The vma's ->fault is responsible for returning a bitmask @@ -1111,6 +1122,20 @@ static inline void clear_page_pfmemalloc(struct page *page) VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \ VM_FAULT_FALLBACK) +#define VM_FAULT_RESULT_TRACE \ + { VM_FAULT_OOM, "OOM" }, \ + { VM_FAULT_SIGBUS, "SIGBUS" }, \ + { VM_FAULT_MAJOR, "MAJOR" }, \ + { VM_FAULT_WRITE, "WRITE" }, \ + { VM_FAULT_HWPOISON, "HWPOISON" }, \ + { VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \ + { VM_FAULT_SIGSEGV, "SIGSEGV" }, \ + { VM_FAULT_NOPAGE, "NOPAGE" }, \ + { VM_FAULT_LOCKED, "LOCKED" }, \ + { VM_FAULT_RETRY, "RETRY" }, \ + { VM_FAULT_FALLBACK, "FALLBACK" }, \ + { VM_FAULT_DONE_COW, "DONE_COW" } + /* Encode hstate index for a hwpoisoned large page */ #define VM_FAULT_SET_HINDEX(x) ((x) << 12) #define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf) -- cgit v1.2.3 From 27a7ffaccd915675c88045ba83493804a0267ab7 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 22 Feb 2017 15:40:00 -0800 Subject: dax: add tracepoints to dax_pmd_insert_mapping() Add tracepoints to dax_pmd_insert_mapping(), following the same logging conventions as the tracepoints in dax_iomap_pmd_fault(). Here is an example PMD fault showing the new tracepoints: big-1504 [001] .... 326.960743: xfs_filemap_pmd_fault: dev 259:0 ino 0x1003 big-1504 [001] .... 326.960753: dax_pmd_fault: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10505000 vm_start 0x10200000 vm_end 0x10700000 pgoff 0x200 max_pgoff 0x1400 big-1504 [001] .... 326.960981: dax_pmd_insert_mapping: dev 259:0 ino 0x1003 shared write address 0x10505000 length 0x200000 pfn 0x100600 DEV|MAP radix_entry 0xc000e big-1504 [001] .... 326.960986: dax_pmd_fault_done: dev 259:0 ino 0x1003 shared WRITE|ALLOW_RETRY|KILLABLE|USER address 0x10505000 vm_start 0x10200000 vm_end 0x10700000 pgoff 0x200 max_pgoff 0x1400 NOPAGE Link: http://lkml.kernel.org/r/1484085142-2297-6-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Acked-by: Steven Rostedt Cc: Dave Chinner Cc: Dave Jiang Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pfn_t.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index a3d90b9da18d..033fc7bbcefa 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -15,6 +15,12 @@ #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) #define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4)) +#define PFN_FLAGS_TRACE \ + { PFN_SG_CHAIN, "SG_CHAIN" }, \ + { PFN_SG_LAST, "SG_LAST" }, \ + { PFN_DEV, "DEV" }, \ + { PFN_MAP, "MAP" } + static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, u64 flags) { pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), }; -- cgit v1.2.3 From d8a849e1bc123790bbbf1facba94452a3aef5736 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 22 Feb 2017 15:40:03 -0800 Subject: mm, dax: make pmd_fault() and friends be the same as fault() Instead of passing in multiple parameters in the pmd_fault() handler, a vmf can be passed in just like a fault() handler. This will simplify code and remove the need for the actual pmd fault handlers to allocate a vmf. Related functions are also modified to do the same. [dave.jiang@intel.com: fix issue with xfs_tests stall when DAX option is off] Link: http://lkml.kernel.org/r/148469861071.195597.3619476895250028518.stgit@djiang5-desk3.ch.intel.com Link: http://lkml.kernel.org/r/1484085142-2297-7-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: Dave Jiang Reviewed-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Dave Chinner Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dax.h | 7 +++---- include/linux/mm.h | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 24ad71173995..a829fee2b42b 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -71,16 +71,15 @@ static inline unsigned int dax_radix_order(void *entry) return PMD_SHIFT - PAGE_SHIFT; return 0; } -int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmd, unsigned int flags, struct iomap_ops *ops); +int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf, + struct iomap_ops *ops); #else static inline unsigned int dax_radix_order(void *entry) { return 0; } static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmd, unsigned int flags, - struct iomap_ops *ops) + struct vm_fault *vmf, struct iomap_ops *ops) { return VM_FAULT_FALLBACK; } diff --git a/include/linux/mm.h b/include/linux/mm.h index d22f7837ad90..0961e95e904e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -351,8 +351,7 @@ struct vm_operations_struct { void (*close)(struct vm_area_struct * area); int (*mremap)(struct vm_area_struct * area); int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); - int (*pmd_fault)(struct vm_area_struct *, unsigned long address, - pmd_t *, unsigned int flags); + int (*pmd_fault)(struct vm_area_struct *vma, struct vm_fault *vmf); void (*map_pages)(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); -- cgit v1.2.3 From f42003917b4569a2f4f0c79c35e1e3df2859f81a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 22 Feb 2017 15:40:06 -0800 Subject: mm, dax: change pmd_fault() to take only vmf parameter pmd_fault() and related functions really only need the vmf parameter since the additional parameters are all included in the vmf struct. Remove the additional parameter and simplify pmd_fault() and friends. Link: http://lkml.kernel.org/r/1484085142-2297-8-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: Dave Jiang Reviewed-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Dave Chinner Cc: Dave Jiang Cc: Matthew Wilcox Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dax.h | 7 +++---- include/linux/mm.h | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index a829fee2b42b..c1bd6ab5e974 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -71,15 +71,14 @@ static inline unsigned int dax_radix_order(void *entry) return PMD_SHIFT - PAGE_SHIFT; return 0; } -int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf, - struct iomap_ops *ops); +int dax_iomap_pmd_fault(struct vm_fault *vmf, struct iomap_ops *ops); #else static inline unsigned int dax_radix_order(void *entry) { return 0; } -static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma, - struct vm_fault *vmf, struct iomap_ops *ops) +static inline int dax_iomap_pmd_fault(struct vm_fault *vmf, + struct iomap_ops *ops) { return VM_FAULT_FALLBACK; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 0961e95e904e..3787f047a098 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -351,7 +351,7 @@ struct vm_operations_struct { void (*close)(struct vm_area_struct * area); int (*mremap)(struct vm_area_struct * area); int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); - int (*pmd_fault)(struct vm_area_struct *vma, struct vm_fault *vmf); + int (*pmd_fault)(struct vm_fault *vmf); void (*map_pages)(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); -- cgit v1.2.3 From bf5eb3de3847ebcfd1fea7bc14072ef9f21d4e8d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 22 Feb 2017 15:41:11 -0800 Subject: slub: separate out sysfs_slab_release() from sysfs_slab_remove() Separate out slub sysfs removal and release, and call the former earlier from __kmem_cache_shutdown(). There's no reason to defer sysfs removal through RCU and this will later allow us to remove sysfs files way earlier during memory cgroup offline instead of release. Link: http://lkml.kernel.org/r/20170117235411.9408-3-tj@kernel.org Signed-off-by: Tejun Heo Acked-by: Vladimir Davydov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 75f56c2ef2d4..07ef550c6627 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -113,9 +113,9 @@ struct kmem_cache { #ifdef CONFIG_SYSFS #define SLAB_SUPPORTS_SYSFS -void sysfs_slab_remove(struct kmem_cache *); +void sysfs_slab_release(struct kmem_cache *); #else -static inline void sysfs_slab_remove(struct kmem_cache *s) +static inline void sysfs_slab_release(struct kmem_cache *s) { } #endif -- cgit v1.2.3 From 9eeadc8b6e0e31f9aea1f8886ef472f62c2b7f55 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 22 Feb 2017 15:41:17 -0800 Subject: slab: reorganize memcg_cache_params We're going to change how memcg caches are iterated. In preparation, clean up and reorganize memcg_cache_params. * The shared ->list is replaced by ->children in root and ->children_node in children. * ->is_root_cache is removed. Instead ->root_cache is moved out of the child union and now used by both root and children. NULL indicates root cache. Non-NULL a memcg one. This patch doesn't cause any observable behavior changes. Link: http://lkml.kernel.org/r/20170117235411.9408-5-tj@kernel.org Signed-off-by: Tejun Heo Acked-by: Vladimir Davydov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 4c5363566815..1f611ba00f1d 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -545,22 +545,37 @@ struct memcg_cache_array { * array to be accessed without taking any locks, on relocation we free the old * version only after a grace period. * - * Child caches will hold extra metadata needed for its operation. Fields are: + * Root and child caches hold different metadata. * - * @memcg: pointer to the memcg this cache belongs to - * @root_cache: pointer to the global, root cache, this cache was derived from + * @root_cache: Common to root and child caches. NULL for root, pointer to + * the root cache for children. * - * Both root and child caches of the same kind are linked into a list chained - * through @list. + * The following fields are specific to root caches. + * + * @memcg_caches: kmemcg ID indexed table of child caches. This table is + * used to index child cachces during allocation and cleared + * early during shutdown. + * + * @children: List of all child caches. While the child caches are also + * reachable through @memcg_caches, a child cache remains on + * this list until it is actually destroyed. + * + * The following fields are specific to child caches. + * + * @memcg: Pointer to the memcg this cache belongs to. + * + * @children_node: List node for @root_cache->children list. */ struct memcg_cache_params { - bool is_root_cache; - struct list_head list; + struct kmem_cache *root_cache; union { - struct memcg_cache_array __rcu *memcg_caches; + struct { + struct memcg_cache_array __rcu *memcg_caches; + struct list_head children; + }; struct { struct mem_cgroup *memcg; - struct kmem_cache *root_cache; + struct list_head children_node; }; }; }; -- cgit v1.2.3 From bc2791f857e1984b7548d2a2de2ffb1a913dee62 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 22 Feb 2017 15:41:21 -0800 Subject: slab: link memcg kmem_caches on their associated memory cgroup With kmem cgroup support enabled, kmem_caches can be created and destroyed frequently and a great number of near empty kmem_caches can accumulate if there are a lot of transient cgroups and the system is not under memory pressure. When memory reclaim starts under such conditions, it can lead to consecutive deactivation and destruction of many kmem_caches, easily hundreds of thousands on moderately large systems, exposing scalability issues in the current slab management code. This is one of the patches to address the issue. While a memcg kmem_cache is listed on its root cache's ->children list, there is no direct way to iterate all kmem_caches which are assocaited with a memory cgroup. The only way to iterate them is walking all caches while filtering out caches which don't match, which would be most of them. This makes memcg destruction operations O(N^2) where N is the total number of slab caches which can be huge. This combined with the synchronous RCU operations can tie up a CPU and affect the whole machine for many hours when memory reclaim triggers offlining and destruction of the stale memcgs. This patch adds mem_cgroup->kmem_caches list which goes through memcg_cache_params->kmem_caches_node of all kmem_caches which are associated with the memcg. All memcg specific iterations, including stat file access, are updated to use the new list instead. Link: http://lkml.kernel.org/r/20170117235411.9408-6-tj@kernel.org Signed-off-by: Tejun Heo Reported-by: Jay Vana Acked-by: Vladimir Davydov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 1 + include/linux/slab.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 254698856b8f..9fcece9be85d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -253,6 +253,7 @@ struct mem_cgroup { /* Index in the kmem_cache->memcg_params.memcg_caches array */ int kmemcg_id; enum memcg_kmem_state kmem_state; + struct list_head kmem_caches; #endif int last_scanned_node; diff --git a/include/linux/slab.h b/include/linux/slab.h index 1f611ba00f1d..a0cc7a77cda2 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -565,6 +565,8 @@ struct memcg_cache_array { * @memcg: Pointer to the memcg this cache belongs to. * * @children_node: List node for @root_cache->children list. + * + * @kmem_caches_node: List node for @memcg->kmem_caches list. */ struct memcg_cache_params { struct kmem_cache *root_cache; @@ -576,6 +578,7 @@ struct memcg_cache_params { struct { struct mem_cgroup *memcg; struct list_head children_node; + struct list_head kmem_caches_node; }; }; }; -- cgit v1.2.3 From 510ded33e075c2bd662b1efab0110f4240325fc9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 22 Feb 2017 15:41:24 -0800 Subject: slab: implement slab_root_caches list With kmem cgroup support enabled, kmem_caches can be created and destroyed frequently and a great number of near empty kmem_caches can accumulate if there are a lot of transient cgroups and the system is not under memory pressure. When memory reclaim starts under such conditions, it can lead to consecutive deactivation and destruction of many kmem_caches, easily hundreds of thousands on moderately large systems, exposing scalability issues in the current slab management code. This is one of the patches to address the issue. slab_caches currently lists all caches including root and memcg ones. This is the only data structure which lists the root caches and iterating root caches can only be done by walking the list while skipping over memcg caches. As there can be a huge number of memcg caches, this can become very expensive. This also can make /proc/slabinfo behave very badly. seq_file processes reads in 4k chunks and seeks to the previous Nth position on slab_caches list to resume after each chunk. With a lot of memcg cache churns on the list, reading /proc/slabinfo can become very slow and its content often ends up with duplicate and/or missing entries. This patch adds a new list slab_root_caches which lists only the root caches. When memcg is not enabled, it becomes just an alias of slab_caches. memcg specific list operations are collected into memcg_[un]link_cache(). Link: http://lkml.kernel.org/r/20170117235411.9408-7-tj@kernel.org Signed-off-by: Tejun Heo Reported-by: Jay Vana Acked-by: Vladimir Davydov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index a0cc7a77cda2..af1a5bef80f4 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -556,6 +556,8 @@ struct memcg_cache_array { * used to index child cachces during allocation and cleared * early during shutdown. * + * @root_caches_node: List node for slab_root_caches list. + * * @children: List of all child caches. While the child caches are also * reachable through @memcg_caches, a child cache remains on * this list until it is actually destroyed. @@ -573,6 +575,7 @@ struct memcg_cache_params { union { struct { struct memcg_cache_array __rcu *memcg_caches; + struct list_head __root_caches_node; struct list_head children; }; struct { -- cgit v1.2.3 From 01fb58bcba63f8fba37581c24c99e9a515dd0335 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 22 Feb 2017 15:41:30 -0800 Subject: slab: remove synchronous synchronize_sched() from memcg cache deactivation path With kmem cgroup support enabled, kmem_caches can be created and destroyed frequently and a great number of near empty kmem_caches can accumulate if there are a lot of transient cgroups and the system is not under memory pressure. When memory reclaim starts under such conditions, it can lead to consecutive deactivation and destruction of many kmem_caches, easily hundreds of thousands on moderately large systems, exposing scalability issues in the current slab management code. This is one of the patches to address the issue. slub uses synchronize_sched() to deactivate a memcg cache. synchronize_sched() is an expensive and slow operation and doesn't scale when a huge number of caches are destroyed back-to-back. While there used to be a simple batching mechanism, the batching was too restricted to be helpful. This patch implements slab_deactivate_memcg_cache_rcu_sched() which slub can use to schedule sched RCU callback instead of performing synchronize_sched() synchronously while holding cgroup_mutex. While this adds online cpus, mems and slab_mutex operations, operating on these locks back-to-back from the same kworker, which is what's gonna happen when there are many to deactivate, isn't expensive at all and this gets rid of the scalability problem completely. Link: http://lkml.kernel.org/r/20170117235411.9408-9-tj@kernel.org Signed-off-by: Tejun Heo Reported-by: Jay Vana Acked-by: Vladimir Davydov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index af1a5bef80f4..3c37a8c51921 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -582,6 +582,12 @@ struct memcg_cache_params { struct mem_cgroup *memcg; struct list_head children_node; struct list_head kmem_caches_node; + + void (*deact_fn)(struct kmem_cache *); + union { + struct rcu_head deact_rcu_head; + struct work_struct deact_work; + }; }; }; }; -- cgit v1.2.3 From 17cc4dfeda97636d67e83de8cd41940b65a93bc7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 22 Feb 2017 15:41:36 -0800 Subject: slab: use memcg_kmem_cache_wq for slab destruction operations If there's contention on slab_mutex, queueing the per-cache destruction work item on the system_wq can unnecessarily create and tie up a lot of kworkers. Rename memcg_kmem_cache_create_wq to memcg_kmem_cache_wq and make it global and use that workqueue for the destruction work items too. While at it, convert the workqueue from an unbound workqueue to a per-cpu one with concurrency limited to 1. It's generally preferable to use per-cpu workqueues and concurrency limit of 1 is safe enough. This is suggested by Joonsoo Kim. Link: http://lkml.kernel.org/r/20170117235411.9408-11-tj@kernel.org Signed-off-by: Tejun Heo Reported-by: Jay Vana Acked-by: Vladimir Davydov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9fcece9be85d..5af377303880 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -830,6 +830,7 @@ void memcg_kmem_uncharge(struct page *page, int order); #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) extern struct static_key_false memcg_kmem_enabled_key; +extern struct workqueue_struct *memcg_kmem_cache_wq; extern int memcg_nr_cache_ids; void memcg_get_cache_ids(void); -- cgit v1.2.3 From 893e26e61d04eac974ded0c11e1647b335c8cb7b Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 22 Feb 2017 15:42:27 -0800 Subject: userfaultfd: non-cooperative: Add fork() event When the mm with uffd-ed vmas fork()-s the respective vmas notify their uffds with the event which contains a descriptor with new uffd. This new descriptor can then be used to get events from the child and populate its mm with data. Note, that there can be different uffd-s controlling different vmas within one mm, so first we should collect all those uffds (and ctx-s) in a list and then notify them all one by one but only once per fork(). The context is created at fork() time but the descriptor, file struct and anon inode object is created at event read time. So some trickery is added to the userfaultfd_ctx_read() to handle the ctx queues' locking vs file creation. Another thing worth noticing is that the task that fork()-s waits for the uffd event to get processed WITHOUT the mmap sem. [aarcange@redhat.com: build warning fix] Link: http://lkml.kernel.org/r/20161216144821.5183-10-aarcange@redhat.com Link: http://lkml.kernel.org/r/20161216144821.5183-9-aarcange@redhat.com Signed-off-by: Pavel Emelyanov Signed-off-by: Mike Rapoport Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Hillf Danton Cc: Michael Rapoport Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/userfaultfd_k.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 11b92b047a1e..79002bca1f43 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -52,6 +52,9 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) return vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP); } +extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); +extern void dup_userfaultfd_complete(struct list_head *); + #else /* CONFIG_USERFAULTFD */ /* mm helpers */ @@ -76,6 +79,16 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) return false; } +static inline int dup_userfaultfd(struct vm_area_struct *vma, + struct list_head *l) +{ + return 0; +} + +static inline void dup_userfaultfd_complete(struct list_head *l) +{ +} + #endif /* CONFIG_USERFAULTFD */ #endif /* _LINUX_USERFAULTFD_K_H */ -- cgit v1.2.3 From 72f87654c69690ff4721bd9b4a39983f971de9a5 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 22 Feb 2017 15:42:34 -0800 Subject: userfaultfd: non-cooperative: add mremap() event The event denotes that an area [start:end] moves to different location. Length change isn't reported as "new" addresses, if they appear on the uffd reader side they will not contain any data and the latter can just zeromap them. Waiting for the event ACK is also done outside of mmap sem, as for fork event. Link: http://lkml.kernel.org/r/20161216144821.5183-12-aarcange@redhat.com Signed-off-by: Pavel Emelyanov Signed-off-by: Mike Rapoport Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Hillf Danton Cc: Michael Rapoport Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/userfaultfd_k.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 79002bca1f43..7f318a46044b 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -55,6 +55,12 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); extern void dup_userfaultfd_complete(struct list_head *); +extern void mremap_userfaultfd_prep(struct vm_area_struct *, + struct vm_userfaultfd_ctx *); +extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx, + unsigned long from, unsigned long to, + unsigned long len); + #else /* CONFIG_USERFAULTFD */ /* mm helpers */ @@ -89,6 +95,17 @@ static inline void dup_userfaultfd_complete(struct list_head *l) { } +static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma, + struct vm_userfaultfd_ctx *ctx) +{ +} + +static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx ctx, + unsigned long from, + unsigned long to, + unsigned long len) +{ +} #endif /* CONFIG_USERFAULTFD */ #endif /* _LINUX_USERFAULTFD_K_H */ -- cgit v1.2.3 From 90794bf19dc19691a16b71bcd75c04094d9e392d Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 22 Feb 2017 15:42:37 -0800 Subject: userfaultfd: non-cooperative: optimize mremap_userfaultfd_complete() Optimize the mremap_userfaultfd_complete() interface to pass only the vm_userfaultfd_ctx pointer through the stack as a microoptimization. Link: http://lkml.kernel.org/r/20161216144821.5183-13-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Reported-by: Hillf Danton Acked-by: Mike Rapoport Cc: "Dr. David Alan Gilbert" Cc: Michael Rapoport Cc: Mike Kravetz Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/userfaultfd_k.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 7f318a46044b..78ec197e8b47 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -57,7 +57,7 @@ extern void dup_userfaultfd_complete(struct list_head *); extern void mremap_userfaultfd_prep(struct vm_area_struct *, struct vm_userfaultfd_ctx *); -extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx, +extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, unsigned long from, unsigned long to, unsigned long len); @@ -100,7 +100,7 @@ static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma, { } -static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx ctx, +static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, unsigned long from, unsigned long to, unsigned long len) -- cgit v1.2.3 From 05ce77249d5068b057082d24ec22d3824f4816ac Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 22 Feb 2017 15:42:40 -0800 Subject: userfaultfd: non-cooperative: add madvise() event for MADV_DONTNEED request If the page is punched out of the address space the uffd reader should know this and zeromap the respective area in case of the #PF event. Link: http://lkml.kernel.org/r/20161216144821.5183-14-aarcange@redhat.com Signed-off-by: Pavel Emelyanov Signed-off-by: Mike Rapoport Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Hillf Danton Cc: Michael Rapoport Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/userfaultfd_k.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 78ec197e8b47..f431861f22f1 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -61,6 +61,11 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, unsigned long from, unsigned long to, unsigned long len); +extern void madvise_userfault_dontneed(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, + unsigned long end); + #else /* CONFIG_USERFAULTFD */ /* mm helpers */ @@ -106,6 +111,13 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, unsigned long len) { } + +static inline void madvise_userfault_dontneed(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, + unsigned long end) +{ +} #endif /* CONFIG_USERFAULTFD */ #endif /* _LINUX_USERFAULTFD_K_H */ -- cgit v1.2.3 From fa4d75c1de13299c61b5e18a1ae46bc00888b599 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 22 Feb 2017 15:42:49 -0800 Subject: userfaultfd: hugetlbfs: add copy_huge_page_from_user for hugetlb userfaultfd support userfaultfd UFFDIO_COPY allows user level code to copy data to a page at fault time. The data is copied from user space to a newly allocated huge page. The new routine copy_huge_page_from_user performs this copy. Link: http://lkml.kernel.org/r/20161216144821.5183-17-aarcange@redhat.com Signed-off-by: Mike Kravetz Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Hillf Danton Cc: Michael Rapoport Cc: Mike Kravetz Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3787f047a098..6bc7f2c1a6d1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2424,6 +2424,9 @@ extern void clear_huge_page(struct page *page, extern void copy_user_huge_page(struct page *dst, struct page *src, unsigned long addr, struct vm_area_struct *vma, unsigned int pages_per_huge_page); +extern long copy_huge_page_from_user(struct page *dst_page, + const void __user *usr_src, + unsigned int pages_per_huge_page); #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ extern struct page_ext_operations debug_guardpage_ops; -- cgit v1.2.3 From 8fb5debc5fcd450470cdd789c2d80ef95ebb8cf4 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 22 Feb 2017 15:42:52 -0800 Subject: userfaultfd: hugetlbfs: add hugetlb_mcopy_atomic_pte for userfaultfd support hugetlb_mcopy_atomic_pte is the low level routine that implements the userfaultfd UFFDIO_COPY command. It is based on the existing mcopy_atomic_pte routine with modifications for huge pages. Link: http://lkml.kernel.org/r/20161216144821.5183-18-aarcange@redhat.com Signed-off-by: Mike Kravetz Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Hillf Danton Cc: Michael Rapoport Cc: Mike Rapoport Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 48c76d612d40..aab2fff3e269 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -81,6 +81,11 @@ void hugetlb_show_meminfo(void); unsigned long hugetlb_total_pages(void); int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags); +int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + struct page **pagep); int hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, vm_flags_t vm_flags); @@ -149,6 +154,8 @@ static inline void hugetlb_show_meminfo(void) #define is_hugepage_only_range(mm, addr, len) 0 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) #define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) +#define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ + src_addr, pagep) ({ BUG(); 0; }) #define huge_pte_offset(mm, address) 0 static inline int dequeue_hwpoisoned_huge_page(struct page *page) { -- cgit v1.2.3 From 810a56b943e265bbabfcd5a8e54cb8d3b16cd6e4 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 22 Feb 2017 15:42:58 -0800 Subject: userfaultfd: hugetlbfs: fix __mcopy_atomic_hugetlb retry/error processing The new routine copy_huge_page_from_user() uses kmap_atomic() to map PAGE_SIZE pages. However, this prevents page faults in the subsequent call to copy_from_user(). This is OK in the case where the routine is copied with mmap_sema held. However, in another case we want to allow page faults. So, add a new argument allow_pagefault to indicate if the routine should allow page faults. [dan.carpenter@oracle.com: unmap the correct pointer] Link: http://lkml.kernel.org/r/20170113082608.GA3548@mwanda [akpm@linux-foundation.org: kunmap() takes a page*, per Hugh] Link: http://lkml.kernel.org/r/20161216144821.5183-20-aarcange@redhat.com Signed-off-by: Mike Kravetz Signed-off-by: Andrea Arcangeli Signed-off-by: Dan Carpenter Cc: "Dr. David Alan Gilbert" Cc: Hillf Danton Cc: Michael Rapoport Cc: Mike Rapoport Cc: Pavel Emelyanov Cc: Hugh Dickins Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6bc7f2c1a6d1..c3e2be2b3296 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2426,7 +2426,8 @@ extern void copy_user_huge_page(struct page *dst, struct page *src, unsigned int pages_per_huge_page); extern long copy_huge_page_from_user(struct page *dst_page, const void __user *usr_src, - unsigned int pages_per_huge_page); + unsigned int pages_per_huge_page, + bool allow_pagefault); #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ extern struct page_ext_operations debug_guardpage_ops; -- cgit v1.2.3 From 87ffc118b54dcd4cc642723603d944673248152f Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 22 Feb 2017 15:43:13 -0800 Subject: userfaultfd: hugetlbfs: gup: support VM_FAULT_RETRY Add support for VM_FAULT_RETRY to follow_hugetlb_page() so that get_user_pages_unlocked/locked and "nonblocking/FOLL_NOWAIT" features will work on hugetlbfs. This is required for fully functional userfaultfd non-present support on hugetlbfs. Link: http://lkml.kernel.org/r/20161216144821.5183-25-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Reviewed-by: Mike Kravetz Cc: "Dr. David Alan Gilbert" Cc: Hillf Danton Cc: Michael Rapoport Cc: Mike Rapoport Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index aab2fff3e269..503099d8aada 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -65,7 +65,8 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, - unsigned long *, unsigned long *, long, unsigned int); + unsigned long *, unsigned long *, long, unsigned int, + int *); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *); void __unmap_hugepage_range_final(struct mmu_gather *tlb, @@ -136,7 +137,7 @@ static inline unsigned long hugetlb_total_pages(void) return 0; } -#define follow_hugetlb_page(m,v,p,vs,a,b,i,w) ({ BUG(); 0; }) +#define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; }) #define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) static inline void hugetlb_report_meminfo(struct seq_file *m) -- cgit v1.2.3 From 4c27fe4c4c84f3afd504ecff2420cc1ad420d38e Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 22 Feb 2017 15:43:25 -0800 Subject: userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support shmem_mcopy_atomic_pte is the low level routine that implements the userfaultfd UFFDIO_COPY command. It is based on the existing mcopy_atomic_pte routine with modifications for shared memory pages. Link: http://lkml.kernel.org/r/20161216144821.5183-29-aarcange@redhat.com Signed-off-by: Mike Rapoport Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Hillf Danton Cc: Michael Rapoport Cc: Mike Kravetz Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shmem_fs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index ff078e7043b6..fdaac9d4d46d 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -124,4 +124,15 @@ static inline bool shmem_huge_enabled(struct vm_area_struct *vma) } #endif +#ifdef CONFIG_SHMEM +extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + struct page **pagep); +#else +#define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ + src_addr, pagep) ({ BUG(); 0; }) +#endif + #endif -- cgit v1.2.3 From b0506e488da5cf2f07f3a4f6d7acaa8f459ad714 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 22 Feb 2017 15:43:28 -0800 Subject: userfaultfd: shmem: introduce vma_is_shmem Currently userfault relies on vma_is_anonymous and vma_is_hugetlb to ensure compatibility of a VMA with userfault. Introduction of vma_is_shmem allows detection if tmpfs backed VMAs, so that they may be used with userfaultfd. Current implementation presumes usage of vma_is_shmem only by slow path routines in userfaultfd, therefore the vma_is_shmem is not made inline to leave the few remaining free bits in vm_flags. Link: http://lkml.kernel.org/r/20161216144821.5183-30-aarcange@redhat.com Signed-off-by: Mike Rapoport Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Hillf Danton Cc: Michael Rapoport Cc: Mike Kravetz Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c3e2be2b3296..bb997493e15d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1383,6 +1383,16 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma) return !vma->vm_ops; } +#ifdef CONFIG_SHMEM +/* + * The vma_is_shmem is not inline because it is used only by slow + * paths in userfault. + */ +bool vma_is_shmem(struct vm_area_struct *vma); +#else +static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; } +#endif + static inline int stack_guard_page_start(struct vm_area_struct *vma, unsigned long addr) { -- cgit v1.2.3 From 74d81bfae8e3f52e956367f6ed764db269b87091 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 22 Feb 2017 15:44:41 -0800 Subject: mm: un-export wake_up_page functions These are no longer used outside mm/filemap.c, so un-export them and make them static where possible. These were exported specifically for NFS use in commit a4796e37c12e ("MM: export page_wakeup functions"). Link: http://lkml.kernel.org/r/20170103182234.30141-3-npiggin@gmail.com Signed-off-by: Nicholas Piggin Cc: Trond Myklebust Cc: Anna Schumaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 324c8dbad1e1..b572f5530392 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -482,19 +482,11 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, } /* - * This is exported only for wait_on_page_locked/wait_on_page_writeback, - * and for filesystems which need to wait on PG_private. + * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc., + * and should not be used directly. */ extern void wait_on_page_bit(struct page *page, int bit_nr); extern int wait_on_page_bit_killable(struct page *page, int bit_nr); -extern void wake_up_page_bit(struct page *page, int bit_nr); - -static inline void wake_up_page(struct page *page, int bit) -{ - if (!PageWaiters(page)) - return; - wake_up_page_bit(page, bit); -} /* * Wait for a page to be unlocked. -- cgit v1.2.3 From 7f354a548d1cb6bb01b6ee74aee9264aa152f1ec Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 22 Feb 2017 15:44:50 -0800 Subject: mm, compaction: add vmstats for kcompactd work A "compact_daemon_wake" vmstat exists that represents the number of times kcompactd has woken up. This doesn't represent how much work it actually did, though. It's useful to understand how much compaction work is being done by kcompactd versus other methods such as direct compaction and explicitly triggered per-node (or system) compaction. This adds two new vmstats: "compact_daemon_migrate_scanned" and "compact_daemon_free_scanned" to represent the number of pages kcompactd has scanned as part of its migration scanner and freeing scanner, respectively. These values are still accounted for in the general "compact_migrate_scanned" and "compact_free_scanned" for compatibility. It could be argued that explicitly triggered compaction could also be tracked separately, and that could be added if others find it useful. Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1612071749390.69852@chino.kir.corp.google.com Signed-off-by: David Rientjes Acked-by: Vlastimil Babka Cc: Michal Hocko Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 4d6ec58a8d45..6aa1b6cb5828 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -56,6 +56,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, COMPACTISOLATED, COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, KCOMPACTD_WAKE, + KCOMPACTD_MIGRATE_SCANNED, KCOMPACTD_FREE_SCANNED, #endif #ifdef CONFIG_HUGETLB_PAGE HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, -- cgit v1.2.3 From b92df1de5d289c0b5d653e72414bf0850b8511e0 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 22 Feb 2017 15:44:53 -0800 Subject: mm: page_alloc: skip over regions of invalid pfns where possible When using a sparse memory model memmap_init_zone() when invoked with the MEMMAP_EARLY context will skip over pages which aren't valid - ie. which aren't in a populated region of the sparse memory map. However if the memory map is extremely sparse then it can spend a long time linearly checking each PFN in a large non-populated region of the memory map & skipping it in turn. When CONFIG_HAVE_MEMBLOCK_NODE_MAP is enabled, we have sufficient information to quickly discover the next valid PFN given an invalid one by searching through the list of memory regions & skipping forwards to the first PFN covered by the memory region to the right of the non-populated region. Implement this in order to speed up memmap_init_zone() for systems with extremely sparse memory maps. James said "I have tested this patch on a virtual model of a Samurai CPU with a sparse memory map. The kernel boot time drops from 109 to 62 seconds. " Link: http://lkml.kernel.org/r/20161125185518.29885-1-paul.burton@imgtec.com Signed-off-by: Paul Burton Tested-by: James Hartley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 5b759c9acf97..38bcf00cbed3 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -203,6 +203,7 @@ int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, unsigned long *end_pfn); void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, unsigned long *out_end_pfn, int *out_nid); +unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn); /** * for_each_mem_pfn_range - early memory pfn range iterator -- cgit v1.2.3 From 16e72e9b30986ee15f17fbb68189ca842c32af58 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 22 Feb 2017 15:45:16 -0800 Subject: powerpc: do not make the entire heap executable On 32-bit powerpc the ELF PLT sections of binaries (built with --bss-plt, or with a toolchain which defaults to it) look like this: [17] .sbss NOBITS 0002aff8 01aff8 000014 00 WA 0 0 4 [18] .plt NOBITS 0002b00c 01aff8 000084 00 WAX 0 0 4 [19] .bss NOBITS 0002b090 01aff8 0000a4 00 WA 0 0 4 Which results in an ELF load header: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align LOAD 0x019c70 0x00029c70 0x00029c70 0x01388 0x014c4 RWE 0x10000 This is all correct, the load region containing the PLT is marked as executable. Note that the PLT starts at 0002b00c but the file mapping ends at 0002aff8, so the PLT falls in the 0 fill section described by the load header, and after a page boundary. Unfortunately the generic ELF loader ignores the X bit in the load headers when it creates the 0 filled non-file backed mappings. It assumes all of these mappings are RW BSS sections, which is not the case for PPC. gcc/ld has an option (--secure-plt) to not do this, this is said to incur a small performance penalty. Currently, to support 32-bit binaries with PLT in BSS kernel maps *entire brk area* with executable rights for all binaries, even --secure-plt ones. Stop doing that. Teach the ELF loader to check the X bit in the relevant load header and create 0 filled anonymous mappings that are executable if the load header requests that. Test program showing the difference in /proc/$PID/maps: int main() { char buf[16*1024]; char *p = malloc(123); /* make "[heap]" mapping appear */ int fd = open("/proc/self/maps", O_RDONLY); int len = read(fd, buf, sizeof(buf)); write(1, buf, len); printf("%p\n", p); return 0; } Compiled using: gcc -mbss-plt -m32 -Os test.c -otest Unpatched ppc64 kernel: 00100000-00120000 r-xp 00000000 00:00 0 [vdso] 0fe10000-0ffd0000 r-xp 00000000 fd:00 67898094 /usr/lib/libc-2.17.so 0ffd0000-0ffe0000 r--p 001b0000 fd:00 67898094 /usr/lib/libc-2.17.so 0ffe0000-0fff0000 rw-p 001c0000 fd:00 67898094 /usr/lib/libc-2.17.so 10000000-10010000 r-xp 00000000 fd:00 100674505 /home/user/test 10010000-10020000 r--p 00000000 fd:00 100674505 /home/user/test 10020000-10030000 rw-p 00010000 fd:00 100674505 /home/user/test 10690000-106c0000 rwxp 00000000 00:00 0 [heap] f7f70000-f7fa0000 r-xp 00000000 fd:00 67898089 /usr/lib/ld-2.17.so f7fa0000-f7fb0000 r--p 00020000 fd:00 67898089 /usr/lib/ld-2.17.so f7fb0000-f7fc0000 rw-p 00030000 fd:00 67898089 /usr/lib/ld-2.17.so ffa90000-ffac0000 rw-p 00000000 00:00 0 [stack] 0x10690008 Patched ppc64 kernel: 00100000-00120000 r-xp 00000000 00:00 0 [vdso] 0fe10000-0ffd0000 r-xp 00000000 fd:00 67898094 /usr/lib/libc-2.17.so 0ffd0000-0ffe0000 r--p 001b0000 fd:00 67898094 /usr/lib/libc-2.17.so 0ffe0000-0fff0000 rw-p 001c0000 fd:00 67898094 /usr/lib/libc-2.17.so 10000000-10010000 r-xp 00000000 fd:00 100674505 /home/user/test 10010000-10020000 r--p 00000000 fd:00 100674505 /home/user/test 10020000-10030000 rw-p 00010000 fd:00 100674505 /home/user/test 10180000-101b0000 rw-p 00000000 00:00 0 [heap] ^^^^ this has changed f7c60000-f7c90000 r-xp 00000000 fd:00 67898089 /usr/lib/ld-2.17.so f7c90000-f7ca0000 r--p 00020000 fd:00 67898089 /usr/lib/ld-2.17.so f7ca0000-f7cb0000 rw-p 00030000 fd:00 67898089 /usr/lib/ld-2.17.so ff860000-ff890000 rw-p 00000000 00:00 0 [stack] 0x10180008 The patch was originally posted in 2012 by Jason Gunthorpe and apparently ignored: https://lkml.org/lkml/2012/9/30/138 Lightly run-tested. Link: http://lkml.kernel.org/r/20161215131950.23054-1-dvlasenk@redhat.com Signed-off-by: Jason Gunthorpe Signed-off-by: Denys Vlasenko Acked-by: Kees Cook Acked-by: Michael Ellerman Tested-by: Jason Gunthorpe Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Aneesh Kumar K.V" Cc: Oleg Nesterov Cc: Florian Weimer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index bb997493e15d..dae6f58d67c8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2083,6 +2083,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len) {} /* These take the mm semaphore themselves */ extern int __must_check vm_brk(unsigned long, unsigned long); +extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); extern unsigned long __must_check vm_mmap(struct file *, unsigned long, unsigned long, unsigned long, -- cgit v1.2.3 From 235b62176712b970c815923e36b9a9cc05d4d901 Mon Sep 17 00:00:00 2001 From: "Huang, Ying" Date: Wed, 22 Feb 2017 15:45:22 -0800 Subject: mm/swap: add cluster lock This patch is to reduce the lock contention of swap_info_struct->lock via using a more fine grained lock in swap_cluster_info for some swap operations. swap_info_struct->lock is heavily contended if multiple processes reclaim pages simultaneously. Because there is only one lock for each swap device. While in common configuration, there is only one or several swap devices in the system. The lock protects almost all swap related operations. In fact, many swap operations only access one element of swap_info_struct->swap_map array. And there is no dependency between different elements of swap_info_struct->swap_map. So a fine grained lock can be used to allow parallel access to the different elements of swap_info_struct->swap_map. In this patch, a spinlock is added to swap_cluster_info to protect the elements of swap_info_struct->swap_map in the swap cluster and the fields of swap_cluster_info. This reduced locking contention for swap_info_struct->swap_map access greatly. Because of the added spinlock, the size of swap_cluster_info increases from 4 bytes to 8 bytes on the 64 bit and 32 bit system. This will use additional 4k RAM for every 1G swap space. Because the size of swap_cluster_info is much smaller than the size of the cache line (8 vs 64 on x86_64 architecture), there may be false cache line sharing between spinlocks in swap_cluster_info. To avoid the false sharing in the first round of the swap cluster allocation, the order of the swap clusters in the free clusters list is changed. So that, the swap_cluster_info sharing the same cache line will be placed as far as possible. After the first round of allocation, the order of the clusters in free clusters list is expected to be random. So the false sharing should be not serious. Compared with a previous implementation using bit_spin_lock, the sequential swap out throughput improved about 3.2%. Test was done on a Xeon E5 v3 system. The swap device used is a RAM simulated PMEM (persistent memory) device. To test the sequential swapping out, the test case created 32 processes, which sequentially allocate and write to the anonymous pages until the RAM and part of the swap device is used. [ying.huang@intel.com: v5] Link: http://lkml.kernel.org/r/878tqeuuic.fsf_-_@yhuang-dev.intel.com [minchan@kernel.org: initialize spinlock for swap_cluster_info] Link: http://lkml.kernel.org/r/1486434945-29753-1-git-send-email-minchan@kernel.org [hughd@google.com: annotate nested locking for cluster lock] Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1702161050540.21773@eggly.anvils Link: http://lkml.kernel.org/r/dbb860bbd825b1aaba18988015e8963f263c3f0d.1484082593.git.tim.c.chen@linux.intel.com Signed-off-by: "Huang, Ying" Signed-off-by: Tim Chen Signed-off-by: Minchan Kim Signed-off-by: Hugh Dickins Cc: Aaron Lu Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Christian Borntraeger Cc: Dave Hansen Cc: Hillf Danton Cc: Huang Ying Cc: Hugh Dickins Cc: Johannes Weiner Cc: Jonathan Corbet escreveu: Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Minchan Kim Cc: Rik van Riel Cc: Shaohua Li Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 7f47b7098b1b..279c7b84bd63 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -176,6 +176,12 @@ enum { * protected by swap_info_struct.lock. */ struct swap_cluster_info { + spinlock_t lock; /* + * Protect swap_cluster_info fields + * and swap_info_struct->swap_map + * elements correspond to the swap + * cluster + */ unsigned int data:24; unsigned int flags:8; }; -- cgit v1.2.3 From 4b3ef9daa4fc0bba742a79faecb17fdaaead083b Mon Sep 17 00:00:00 2001 From: "Huang, Ying" Date: Wed, 22 Feb 2017 15:45:26 -0800 Subject: mm/swap: split swap cache into 64MB trunks The patch is to improve the scalability of the swap out/in via using fine grained locks for the swap cache. In current kernel, one address space will be used for each swap device. And in the common configuration, the number of the swap device is very small (one is typical). This causes the heavy lock contention on the radix tree of the address space if multiple tasks swap out/in concurrently. But in fact, there is no dependency between pages in the swap cache. So that, we can split the one shared address space for each swap device into several address spaces to reduce the lock contention. In the patch, the shared address space is split into 64MB trunks. 64MB is chosen to balance the memory space usage and effect of lock contention reduction. The size of struct address_space on x86_64 architecture is 408B, so with the patch, 6528B more memory will be used for every 1GB swap space on x86_64 architecture. One address space is still shared for the swap entries in the same 64M trunks. To avoid lock contention for the first round of swap space allocation, the order of the swap clusters in the initial free clusters list is changed. The swap space distance between the consecutive swap clusters in the free cluster list is at least 64M. After the first round of allocation, the swap clusters are expected to be freed randomly, so the lock contention should be reduced effectively. Link: http://lkml.kernel.org/r/735bab895e64c930581ffb0a05b661e01da82bc5.1484082593.git.tim.c.chen@linux.intel.com Signed-off-by: "Huang, Ying" Signed-off-by: Tim Chen Cc: Aaron Lu Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Christian Borntraeger Cc: Dave Hansen Cc: Hillf Danton Cc: Huang Ying Cc: Hugh Dickins Cc: Johannes Weiner Cc: Jonathan Corbet escreveu: Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Minchan Kim Cc: Rik van Riel Cc: Shaohua Li Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 279c7b84bd63..648a32b58e3e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -343,8 +343,13 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *, sector_t *); /* linux/mm/swap_state.c */ -extern struct address_space swapper_spaces[]; -#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)]) +/* One swap address space for each 64M swap space */ +#define SWAP_ADDRESS_SPACE_SHIFT 14 +#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT) +extern struct address_space *swapper_spaces[]; +#define swap_address_space(entry) \ + (&swapper_spaces[swp_type(entry)][swp_offset(entry) \ + >> SWAP_ADDRESS_SPACE_SHIFT]) extern unsigned long total_swapcache_pages(void); extern void show_swap_cache_info(void); extern int add_to_swap(struct page *, struct list_head *list); @@ -398,6 +403,8 @@ extern struct swap_info_struct *page_swap_info(struct page *); extern bool reuse_swap_page(struct page *, int *); extern int try_to_free_swap(struct page *); struct backing_dev_info; +extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); +extern void exit_swap_address_space(unsigned int type); #else /* CONFIG_SWAP */ -- cgit v1.2.3 From e8c26ab60598558ec3a626e7925b06e7417d7710 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Wed, 22 Feb 2017 15:45:29 -0800 Subject: mm/swap: skip readahead for unreferenced swap slots We can avoid needlessly allocating page for swap slots that are not used by anyone. No pages have to be read in for these slots. Link: http://lkml.kernel.org/r/0784b3f20b9bd3aa5552219624cb78dc4ae710c9.1484082593.git.tim.c.chen@linux.intel.com Signed-off-by: Tim Chen Signed-off-by: "Huang, Ying" Cc: Aaron Lu Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Christian Borntraeger Cc: Dave Hansen Cc: Hillf Danton Cc: Huang Ying Cc: Hugh Dickins Cc: Johannes Weiner Cc: Jonathan Corbet escreveu: Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Minchan Kim Cc: Rik van Riel Cc: Shaohua Li Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 648a32b58e3e..3116382067cd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -398,6 +398,7 @@ extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct page *, struct block_device **); extern sector_t swapdev_block(int, pgoff_t); extern int page_swapcount(struct page *); +extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); extern bool reuse_swap_page(struct page *, int *); @@ -492,6 +493,11 @@ static inline int page_swapcount(struct page *page) return 0; } +static inline int __swp_swapcount(swp_entry_t entry) +{ + return 0; +} + static inline int swp_swapcount(swp_entry_t entry) { return 0; -- cgit v1.2.3 From 36005bae205da3eef0016a5c96a34f10a68afa1e Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Wed, 22 Feb 2017 15:45:33 -0800 Subject: mm/swap: allocate swap slots in batches Currently, the swap slots are allocated one page at a time, causing contention to the swap_info lock protecting the swap partition on every page being swapped. This patch adds new functions get_swap_pages and scan_swap_map_slots to request multiple swap slots at once. This will reduces the lock contention on the swap_info lock. Also scan_swap_map_slots can operate more efficiently as swap slots often occurs in clusters close to each other on a swap device and it is quicker to allocate them together. Link: http://lkml.kernel.org/r/9fec2845544371f62c3763d43510045e33d286a6.1484082593.git.tim.c.chen@linux.intel.com Signed-off-by: Tim Chen Signed-off-by: "Huang, Ying" Cc: Aaron Lu Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Christian Borntraeger Cc: Dave Hansen Cc: Hillf Danton Cc: Huang Ying Cc: Hugh Dickins Cc: Johannes Weiner Cc: Jonathan Corbet escreveu: Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Minchan Kim Cc: Rik van Riel Cc: Shaohua Li Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 3116382067cd..956eae8a8edf 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -27,6 +27,7 @@ struct bio; #define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \ SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \ SWAP_FLAG_DISCARD_PAGES) +#define SWAP_BATCH 64 static inline int current_is_kswapd(void) { @@ -386,6 +387,7 @@ static inline long get_nr_swap_pages(void) extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); extern swp_entry_t get_swap_page_of_type(int); +extern int get_swap_pages(int n, swp_entry_t swp_entries[]); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); -- cgit v1.2.3 From 7c00bafee87c7bac7ed9eced7c161f8e5332cb4e Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Wed, 22 Feb 2017 15:45:36 -0800 Subject: mm/swap: free swap slots in batch Add new functions that free unused swap slots in batches without the need to reacquire swap info lock. This improves scalability and reduce lock contention. Link: http://lkml.kernel.org/r/c25e0fcdfd237ec4ca7db91631d3b9f6ed23824e.1484082593.git.tim.c.chen@linux.intel.com Signed-off-by: Tim Chen Signed-off-by: "Huang, Ying" Cc: Aaron Lu Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Christian Borntraeger Cc: Dave Hansen Cc: Hillf Danton Cc: Huang Ying Cc: Hugh Dickins Cc: Johannes Weiner Cc: Jonathan Corbet escreveu: Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Minchan Kim Cc: Rik van Riel Cc: Shaohua Li Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 956eae8a8edf..bcc0b18f96d2 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -394,6 +394,7 @@ extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t); extern void swap_free(swp_entry_t); extern void swapcache_free(swp_entry_t); +extern void swapcache_free_entries(swp_entry_t *entries, int n); extern int free_swap_and_cache(swp_entry_t); extern int swap_type_of(dev_t, sector_t, struct block_device **); extern unsigned int count_swap_pages(int, int); -- cgit v1.2.3 From 67afa38e012e9581b9b42f2a41dfc56b1280794d Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Wed, 22 Feb 2017 15:45:39 -0800 Subject: mm/swap: add cache for swap slots allocation We add per cpu caches for swap slots that can be allocated and freed quickly without the need to touch the swap info lock. Two separate caches are maintained for swap slots allocated and swap slots returned. This is to allow the swap slots to be returned to the global pool in a batch so they will have a chance to be coaelesced with other slots in a cluster. We do not reuse the slots that are returned right away, as it may increase fragmentation of the slots. The swap allocation cache is protected by a mutex as we may sleep when searching for empty slots in cache. The swap free cache is protected by a spin lock as we cannot sleep in the free path. We refill the swap slots cache when we run out of slots, and we disable the swap slots cache and drain the slots if the global number of slots fall below a low watermark threshold. We re-enable the cache agian when the slots available are above a high watermark. [ying.huang@intel.com: use raw_cpu_ptr over this_cpu_ptr for swap slots access] [tim.c.chen@linux.intel.com: add comments on locks in swap_slots.h] Link: http://lkml.kernel.org/r/20170118180327.GA24225@linux.intel.com Link: http://lkml.kernel.org/r/35de301a4eaa8daa2977de6e987f2c154385eb66.1484082593.git.tim.c.chen@linux.intel.com Signed-off-by: Tim Chen Signed-off-by: "Huang, Ying" Reviewed-by: Michal Hocko Cc: Aaron Lu Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Christian Borntraeger Cc: Dave Hansen Cc: Hillf Danton Cc: Huang Ying Cc: Hugh Dickins Cc: Johannes Weiner Cc: Jonathan Corbet escreveu: Cc: Kirill A. Shutemov Cc: Minchan Kim Cc: Rik van Riel Cc: Shaohua Li Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 4 ++++ include/linux/swap_slots.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 include/linux/swap_slots.h (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index bcc0b18f96d2..45e91dd6716d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -372,6 +372,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t, /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; +extern bool has_usable_swap(void); /* Swap 50% full? Release swapcache more aggressively.. */ static inline bool vm_swap_full(void) @@ -410,6 +411,9 @@ struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); extern void exit_swap_address_space(unsigned int type); +extern int get_swap_slots(int n, swp_entry_t *slots); +extern void swapcache_free_batch(swp_entry_t *entries, int n); + #else /* CONFIG_SWAP */ #define swap_address_space(entry) (NULL) diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h new file mode 100644 index 000000000000..ba5623b27c60 --- /dev/null +++ b/include/linux/swap_slots.h @@ -0,0 +1,28 @@ +#ifndef _LINUX_SWAP_SLOTS_H +#define _LINUX_SWAP_SLOTS_H + +#include +#include +#include + +#define SWAP_SLOTS_CACHE_SIZE SWAP_BATCH +#define THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE (5*SWAP_SLOTS_CACHE_SIZE) +#define THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE (2*SWAP_SLOTS_CACHE_SIZE) + +struct swap_slots_cache { + bool lock_initialized; + struct mutex alloc_lock; /* protects slots, nr, cur */ + swp_entry_t *slots; + int nr; + int cur; + spinlock_t free_lock; /* protects slots_ret, n_ret */ + swp_entry_t *slots_ret; + int n_ret; +}; + +void disable_swap_slots_cache_lock(void); +void reenable_swap_slots_cache_unlock(void); +int enable_swap_slots_cache(void); +int free_swap_slot(swp_entry_t entry); + +#endif /* _LINUX_SWAP_SLOTS_H */ -- cgit v1.2.3 From ba81f83842549871cbd7226fc11530dc464500bb Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 22 Feb 2017 15:45:46 -0800 Subject: mm/swap: skip readahead only when swap slot cache is enabled Because during swap off, a swap entry may have swap_map[] == SWAP_HAS_CACHE (for example, just allocated). If we return NULL in __read_swap_cache_async(), the swap off will abort. So when swap slot cache is disabled, (for swap off), we will wait for page to be put into swap cache in such race condition. This should not be a problem for swap slot cache, because swap slot cache should be drained after clearing swap_slot_cache_enabled. [ying.huang@intel.com: fix memory leak in __read_swap_cache_async()] Link: http://lkml.kernel.org/r/874lzt6znd.fsf@yhuang-dev.intel.com Link: http://lkml.kernel.org/r/5e2c5f6abe8e6eb0797408897b1bba80938e9b9d.1484082593.git.tim.c.chen@linux.intel.com Signed-off-by: "Huang, Ying" Signed-off-by: Tim Chen Cc: Aaron Lu Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Christian Borntraeger Cc: Dave Hansen Cc: Hillf Danton Cc: Huang Ying Cc: Hugh Dickins Cc: Johannes Weiner Cc: Jonathan Corbet escreveu: Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Minchan Kim Cc: Rik van Riel Cc: Shaohua Li Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap_slots.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h index ba5623b27c60..6ef92d17633d 100644 --- a/include/linux/swap_slots.h +++ b/include/linux/swap_slots.h @@ -25,4 +25,6 @@ void reenable_swap_slots_cache_unlock(void); int enable_swap_slots_cache(void); int free_swap_slot(swp_entry_t entry); +extern bool swap_slot_cache_enabled; + #endif /* _LINUX_SWAP_SLOTS_H */ -- cgit v1.2.3 From 21440d7eb9044001b7fdb71d0163689f60a0f2a1 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 22 Feb 2017 15:45:49 -0800 Subject: mm, thp: add new defer+madvise defrag option There is no thp defrag option that currently allows MADV_HUGEPAGE regions to do direct compaction and reclaim while all other thp allocations simply trigger kswapd and kcompactd in the background and fail immediately. The "defer" setting simply triggers background reclaim and compaction for all regions, regardless of MADV_HUGEPAGE, which makes it unusable for our userspace where MADV_HUGEPAGE is being used to indicate the application is willing to wait for work for thp memory to be available. The "madvise" setting will do direct compaction and reclaim for these MADV_HUGEPAGE regions, but does not trigger kswapd and kcompactd in the background for anybody else. For reasonable usage, there needs to be a mesh between the two options. This patch introduces a fifth mode, "defer+madvise", that will do direct reclaim and compaction for MADV_HUGEPAGE regions and trigger background reclaim and compaction for everybody else so that hugepages may be available in the near future. A proposal to allow direct reclaim and compaction for MADV_HUGEPAGE regions as part of the "defer" mode, making it a very powerful setting and avoids breaking userspace, was offered: http://marc.info/?t=148236612700003 This additional mode is a compromise. A second proposal to allow both "defer" and "madvise" to be selected at the same time was also offered: http://marc.info/?t=148357345300001. This is possible, but there was a concern that it might break existing userspaces the parse the output of the defrag mode, so the fifth option was introduced instead. This patch also cleans up the helper function for storing to "enabled" and "defrag" since the former supports three modes while the latter supports five and triple_flag_store() was getting unnecessarily messy. Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1701101614330.41805@chino.kir.corp.google.com Signed-off-by: David Rientjes Acked-by: Mel Gorman Cc: Vlastimil Babka Cc: Michal Hocko Cc: Jonathan Corbet Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 97e478d6b690..f0029e786205 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -33,6 +33,7 @@ enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, + TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, -- cgit v1.2.3 From fd538803731e50367b7c59ce4ad3454426a3d671 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 22 Feb 2017 15:45:58 -0800 Subject: mm, vmscan: cleanup lru size claculations lruvec_lru_size returns the full size of the LRU list while we sometimes need a value reduced only to eligible zones (e.g. for lowmem requests). inactive_list_is_low is one such user. Later patches will add more of them. Add a new parameter to lruvec_lru_size and allow it filter out zones which are not eligible for the given context. Link: http://lkml.kernel.org/r/20170117103702.28542-2-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Johannes Weiner Acked-by: Hillf Danton Acked-by: Minchan Kim Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f4aac87adcc3..82fc632fd11d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -779,7 +779,7 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec) #endif } -extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru); +extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx); #ifdef CONFIG_HAVE_MEMORY_PRESENT void memory_present(int nid, unsigned long start, unsigned long end); -- cgit v1.2.3 From a8e99259e7e32b67af2b447f0a570813c0c283ec Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 22 Feb 2017 15:46:10 -0800 Subject: mm, page_alloc: warn_alloc print nodemask warn_alloc is currently used for to report an allocation failure or an allocation stall. We print some details of the allocation request like the gfp mask and the request order. We do not print the allocation nodemask which is important when debugging the reason for the allocation failure as well. We alreaddy print the nodemask in the OOM report. Add nodemask to warn_alloc and print it in warn_alloc as well. Link: http://lkml.kernel.org/r/20170117091543.25850-3-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Mel Gorman Acked-by: Hillf Danton Cc: Johannes Weiner Cc: Vlastimil Babka Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index dae6f58d67c8..28b6c3f8a7f3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1942,8 +1942,8 @@ extern void si_meminfo_node(struct sysinfo *val, int nid); extern unsigned long arch_reserved_kernel_pages(void); #endif -extern __printf(2, 3) -void warn_alloc(gfp_t gfp_mask, const char *fmt, ...); +extern __printf(3, 4) +void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...); extern void setup_per_cpu_pageset(void); -- cgit v1.2.3 From 9af744d743170b5f5ef70031dea8d772d166ab28 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 22 Feb 2017 15:46:16 -0800 Subject: lib/show_mem.c: teach show_mem to work with the given nodemask show_mem() allows to filter out node specific data which is irrelevant to the allocation request via SHOW_MEM_FILTER_NODES. The filtering is done in skip_free_areas_node which skips all nodes which are not in the mems_allowed of the current process. This works most of the time as expected because the nodemask shouldn't be outside of the allocating task but there are some exceptions. E.g. memory hotplug might want to request allocations from outside of the allowed nodes (see new_node_page). Get rid of this hardcoded behavior and push the allocation mask down the show_mem path and use it instead of cpuset_current_mems_allowed. NULL nodemask is interpreted as cpuset_current_mems_allowed. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20170117091543.25850-5-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Mel Gorman Cc: Hillf Danton Cc: Johannes Weiner Cc: Vlastimil Babka Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 28b6c3f8a7f3..8a67cae5a07c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1152,8 +1152,7 @@ extern void pagefault_out_of_memory(void); */ #define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ -extern void show_free_areas(unsigned int flags); -extern bool skip_free_areas_node(unsigned int flags, int nid); +extern void show_free_areas(unsigned int flags, nodemask_t *nodemask); int shmem_zero_setup(struct vm_area_struct *); #ifdef CONFIG_SHMEM @@ -1934,7 +1933,7 @@ extern void setup_per_zone_wmarks(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); extern void __init mmap_init(void); -extern void show_mem(unsigned int flags); +extern void show_mem(unsigned int flags, nodemask_t *nodemask); extern long si_mem_available(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); -- cgit v1.2.3 From da162e9368990ed747075e2ab427da0759fc4a59 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 22 Feb 2017 15:46:31 -0800 Subject: mm: drop zap_details::ignore_dirty The only user of ignore_dirty is oom-reaper. But it doesn't really use it. ignore_dirty only has effect on file pages mapped with dirty pte. But oom-repear skips shared VMAs, so there's no way we can dirty file pte in them. Link: http://lkml.kernel.org/r/20170118122429.43661-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Tetsuo Handa Cc: Peter Zijlstra Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8a67cae5a07c..eae478a42f26 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1175,7 +1175,6 @@ struct zap_details { struct address_space *check_mapping; /* Check page->mapping if set */ pgoff_t first_index; /* Lowest page->index to unmap */ pgoff_t last_index; /* Highest page->index to unmap */ - bool ignore_dirty; /* Ignore dirty pages */ bool check_swap_entries; /* Check also swap entries */ }; -- cgit v1.2.3 From 3e8715fdc03e8df4d26d8e436166e44e3e416d3b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 22 Feb 2017 15:46:34 -0800 Subject: mm: drop zap_details::check_swap_entries detail == NULL would give the same functionality as .check_swap_entries==true. Link: http://lkml.kernel.org/r/20170118122429.43661-2-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Tetsuo Handa Cc: Peter Zijlstra Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index eae478a42f26..062936e8b832 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1175,7 +1175,6 @@ struct zap_details { struct address_space *check_mapping; /* Check page->mapping if set */ pgoff_t first_index; /* Lowest page->index to unmap */ pgoff_t last_index; /* Highest page->index to unmap */ - bool check_swap_entries; /* Check also swap entries */ }; struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, -- cgit v1.2.3 From ecf1385d72f0491400a8ceca7001196ca369aa8c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 22 Feb 2017 15:46:37 -0800 Subject: mm: drop unused argument of zap_page_range() There's no users of zap_page_range() who wants non-NULL 'details'. Let's drop it. Link: http://lkml.kernel.org/r/20170118122429.43661-3-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Tetsuo Handa Cc: Peter Zijlstra Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 062936e8b832..574bc157a27c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1185,7 +1185,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); void zap_page_range(struct vm_area_struct *vma, unsigned long address, - unsigned long size, struct zap_details *); + unsigned long size); void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long start, unsigned long end); -- cgit v1.2.3 From 083fb8edda0487d192e8c117f625563b920cf7a4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 22 Feb 2017 15:46:48 -0800 Subject: mm: fix stray kernel-doc notation Delete stray (second) function description in find_lock_page() kernel-doc notation. Note: scripts/kernel-doc just ignores the second function description. Fixes: 2457aec63745e ("mm: non-atomically mark page accessed during page cache allocation where possible") Link: http://lkml.kernel.org/r/b037e9a3-516c-ec02-6c8e-fa5479747ba6@infradead.org Signed-off-by: Randy Dunlap Reported-by: Matthew Wilcox Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index b572f5530392..84943e8057ef 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -266,7 +266,6 @@ static inline struct page *find_get_page_flags(struct address_space *mapping, /** * find_lock_page - locate, pin and lock a pagecache page - * pagecache_get_page - find and get a page reference * @mapping: the address_space to search * @offset: the page index * -- cgit v1.2.3 From 745d8ae4622c6808b22e33a944c7decb30074be4 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 23 Feb 2017 12:02:42 +0200 Subject: net/mlx4: Spoofcheck and zero MAC can't coexist Spoofcheck can't be enabled if VF MAC is zero. Vice versa, can't zero MAC if spoofcheck is on. Fixes: 8f7ba3ca12f6 ('net/mlx4: Add set VF mac address support') Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 2 +- include/linux/mlx4/driver.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 1f3568694a57..7b74afcbbab2 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -308,7 +308,7 @@ int mlx4_get_counter_stats(struct mlx4_dev *dev, int counter_index, int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx, struct ifla_vf_stats *vf_stats); u32 mlx4_comm_get_version(void); -int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); +int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos, __be16 proto); int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate, diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index bd0e7075ea6d..e965e5090d96 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -104,4 +104,14 @@ static inline u64 mlx4_mac_to_u64(u8 *addr) return mac; } +static inline void mlx4_u64_to_mac(u8 *addr, u64 mac) +{ + int i; + + for (i = ETH_ALEN; i > 0; i--) { + addr[i - 1] = mac && 0xFF; + mac >>= 8; + } +} + #endif /* MLX4_DRIVER_H */ -- cgit v1.2.3 From 7d6d15789d69856f1c5405e106a773c87277eb8c Mon Sep 17 00:00:00 2001 From: Scott Bauer Date: Wed, 22 Feb 2017 10:15:06 -0700 Subject: block/sed-opal: Introduce free_opal_dev to free the structure and clean up state Before we free the opal structure we need to clean up any saved locking ranges that the user had told us to unlock from a suspend. Signed-off-by: Scott Bauer Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/sed-opal.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index deee23d012e7..04b124fca51e 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -27,6 +27,7 @@ typedef int (sec_send_recv)(void *data, u16 spsp, u8 secp, void *buffer, size_t len, bool send); #ifdef CONFIG_BLK_SED_OPAL +void free_opal_dev(struct opal_dev *dev); bool opal_unlock_from_suspend(struct opal_dev *dev); struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv); int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr); @@ -51,6 +52,10 @@ static inline bool is_sed_ioctl(unsigned int cmd) return false; } #else +static inline void free_opal_dev(struct opal_dev *dev) +{ +} + static inline bool is_sed_ioctl(unsigned int cmd) { return false; -- cgit v1.2.3 From da55f2cc78418dee88400aafbbaed19d7ac8188e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 22 Feb 2017 10:58:29 -0800 Subject: blk-mq: use sbq wait queues instead of restart for driver tags Commit 50e1dab86aa2 ("blk-mq-sched: fix starvation for multiple hardware queues and shared tags") fixed one starvation issue for shared tags. However, we can still get into a situation where we fail to allocate a tag because all tags are allocated but we don't have any pending requests on any hardware queue. One solution for this would be to restart all queues that share a tag map, but that really sucks. Ideally, we could just block and wait for a tag, but that isn't always possible from blk_mq_dispatch_rq_list(). However, we can still use the struct sbitmap_queue wait queues with a custom callback instead of blocking. This has a few benefits: 1. It avoids iterating over all hardware queues when completing an I/O, which the current restart code has to do. 2. It benefits from the existing rolling wakeup code. 3. It avoids punting to another thread just to have it block. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8e4df3d6c8cd..001d30d727c5 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -33,6 +33,7 @@ struct blk_mq_hw_ctx { struct blk_mq_ctx **ctxs; unsigned int nr_ctx; + wait_queue_t dispatch_wait; atomic_t wait_index; struct blk_mq_tags *tags; @@ -160,6 +161,7 @@ enum { BLK_MQ_S_STOPPED = 0, BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_S_SCHED_RESTART = 2, + BLK_MQ_S_TAG_WAITING = 3, BLK_MQ_MAX_DEPTH = 10240, -- cgit v1.2.3 From d08d1b27fe2a7f6923952613f5fab56ae47a6f5b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 22 Feb 2017 13:58:52 +0530 Subject: PM / QoS: Remove global notifiers They were never used in the kernel, so get rid of them. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/pm_qos.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index d4d34791e463..3e2547d6e207 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -146,8 +146,6 @@ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier); int dev_pm_qos_remove_notifier(struct device *dev, struct notifier_block *notifier); -int dev_pm_qos_add_global_notifier(struct notifier_block *notifier); -int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier); void dev_pm_qos_constraints_init(struct device *dev); void dev_pm_qos_constraints_destroy(struct device *dev); int dev_pm_qos_add_ancestor_request(struct device *dev, @@ -199,12 +197,6 @@ static inline int dev_pm_qos_add_notifier(struct device *dev, static inline int dev_pm_qos_remove_notifier(struct device *dev, struct notifier_block *notifier) { return 0; } -static inline int dev_pm_qos_add_global_notifier( - struct notifier_block *notifier) - { return 0; } -static inline int dev_pm_qos_remove_global_notifier( - struct notifier_block *notifier) - { return 0; } static inline void dev_pm_qos_constraints_init(struct device *dev) { dev->power.power_state = PMSG_ON; -- cgit v1.2.3 From 29dee3c03abce04cd527878ef5f9e5f91b7b83f4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 10 Feb 2017 16:27:52 +0100 Subject: locking/refcounts: Out-of-line everything Linus asked to please make this real C code. And since size then isn't an issue what so ever anymore, remove the debug knob and make all WARN()s unconditional. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dwindsor@gmail.com Cc: elena.reshetova@intel.com Cc: gregkh@linuxfoundation.org Cc: ishkamiel@gmail.com Cc: keescook@chromium.org Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 277 ++--------------------------------------------- 1 file changed, 12 insertions(+), 265 deletions(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 600aadf9cca4..0e8cfb2ce91e 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -1,55 +1,10 @@ #ifndef _LINUX_REFCOUNT_H #define _LINUX_REFCOUNT_H -/* - * Variant of atomic_t specialized for reference counts. - * - * The interface matches the atomic_t interface (to aid in porting) but only - * provides the few functions one should use for reference counting. - * - * It differs in that the counter saturates at UINT_MAX and will not move once - * there. This avoids wrapping the counter and causing 'spurious' - * use-after-free issues. - * - * Memory ordering rules are slightly relaxed wrt regular atomic_t functions - * and provide only what is strictly required for refcounts. - * - * The increments are fully relaxed; these will not provide ordering. The - * rationale is that whatever is used to obtain the object we're increasing the - * reference count on will provide the ordering. For locked data structures, - * its the lock acquire, for RCU/lockless data structures its the dependent - * load. - * - * Do note that inc_not_zero() provides a control dependency which will order - * future stores against the inc, this ensures we'll never modify the object - * if we did not in fact acquire a reference. - * - * The decrements will provide release order, such that all the prior loads and - * stores will be issued before, it also provides a control dependency, which - * will order us against the subsequent free(). - * - * The control dependency is against the load of the cmpxchg (ll/sc) that - * succeeded. This means the stores aren't fully ordered, but this is fine - * because the 1->0 transition indicates no concurrency. - * - * Note that the allocator is responsible for ordering things between free() - * and alloc(). - * - */ - #include -#include #include #include -#ifdef CONFIG_DEBUG_REFCOUNT -#define REFCOUNT_WARN(cond, str) WARN_ON(cond) -#define __refcount_check __must_check -#else -#define REFCOUNT_WARN(cond, str) (void)(cond) -#define __refcount_check -#endif - typedef struct refcount_struct { atomic_t refs; } refcount_t; @@ -66,229 +21,21 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -static inline __refcount_check -bool refcount_add_not_zero(unsigned int i, refcount_t *r) -{ - unsigned int old, new, val = atomic_read(&r->refs); - - for (;;) { - if (!val) - return false; - - if (unlikely(val == UINT_MAX)) - return true; - - new = val + i; - if (new < val) - new = UINT_MAX; - old = atomic_cmpxchg_relaxed(&r->refs, val, new); - if (old == val) - break; - - val = old; - } - - REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); - - return true; -} - -static inline void refcount_add(unsigned int i, refcount_t *r) -{ - REFCOUNT_WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); -} - -/* - * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN. - * - * Provides no memory ordering, it is assumed the caller has guaranteed the - * object memory to be stable (RCU, etc.). It does provide a control dependency - * and thereby orders future stores. See the comment on top. - */ -static inline __refcount_check -bool refcount_inc_not_zero(refcount_t *r) -{ - unsigned int old, new, val = atomic_read(&r->refs); - - for (;;) { - new = val + 1; - - if (!val) - return false; - - if (unlikely(!new)) - return true; - - old = atomic_cmpxchg_relaxed(&r->refs, val, new); - if (old == val) - break; - - val = old; - } - - REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); - - return true; -} - -/* - * Similar to atomic_inc(), will saturate at UINT_MAX and WARN. - * - * Provides no memory ordering, it is assumed the caller already has a - * reference on the object, will WARN when this is not so. - */ -static inline void refcount_inc(refcount_t *r) -{ - REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); -} - -/* - * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to - * decrement when saturated at UINT_MAX. - * - * Provides release memory ordering, such that prior loads and stores are done - * before, and provides a control dependency such that free() must come after. - * See the comment on top. - */ -static inline __refcount_check -bool refcount_sub_and_test(unsigned int i, refcount_t *r) -{ - unsigned int old, new, val = atomic_read(&r->refs); - - for (;;) { - if (unlikely(val == UINT_MAX)) - return false; - - new = val - i; - if (new > val) { - REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n"); - return false; - } - - old = atomic_cmpxchg_release(&r->refs, val, new); - if (old == val) - break; - - val = old; - } - - return !new; -} - -static inline __refcount_check -bool refcount_dec_and_test(refcount_t *r) -{ - return refcount_sub_and_test(1, r); -} +extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r); +extern void refcount_add(unsigned int i, refcount_t *r); -/* - * Similar to atomic_dec(), it will WARN on underflow and fail to decrement - * when saturated at UINT_MAX. - * - * Provides release memory ordering, such that prior loads and stores are done - * before. - */ -static inline -void refcount_dec(refcount_t *r) -{ - REFCOUNT_WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); -} - -/* - * No atomic_t counterpart, it attempts a 1 -> 0 transition and returns the - * success thereof. - * - * Like all decrement operations, it provides release memory order and provides - * a control dependency. - * - * It can be used like a try-delete operator; this explicit case is provided - * and not cmpxchg in generic, because that would allow implementing unsafe - * operations. - */ -static inline __refcount_check -bool refcount_dec_if_one(refcount_t *r) -{ - return atomic_cmpxchg_release(&r->refs, 1, 0) == 1; -} - -/* - * No atomic_t counterpart, it decrements unless the value is 1, in which case - * it will return false. - * - * Was often done like: atomic_add_unless(&var, -1, 1) - */ -static inline __refcount_check -bool refcount_dec_not_one(refcount_t *r) -{ - unsigned int old, new, val = atomic_read(&r->refs); +extern __must_check bool refcount_inc_not_zero(refcount_t *r); +extern void refcount_inc(refcount_t *r); - for (;;) { - if (unlikely(val == UINT_MAX)) - return true; +extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r); +extern void refcount_sub(unsigned int i, refcount_t *r); - if (val == 1) - return false; +extern __must_check bool refcount_dec_and_test(refcount_t *r); +extern void refcount_dec(refcount_t *r); - new = val - 1; - if (new > val) { - REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n"); - return true; - } - - old = atomic_cmpxchg_release(&r->refs, val, new); - if (old == val) - break; - - val = old; - } - - return true; -} - -/* - * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail - * to decrement when saturated at UINT_MAX. - * - * Provides release memory ordering, such that prior loads and stores are done - * before, and provides a control dependency such that free() must come after. - * See the comment on top. - */ -static inline __refcount_check -bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) -{ - if (refcount_dec_not_one(r)) - return false; - - mutex_lock(lock); - if (!refcount_dec_and_test(r)) { - mutex_unlock(lock); - return false; - } - - return true; -} - -/* - * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to - * decrement when saturated at UINT_MAX. - * - * Provides release memory ordering, such that prior loads and stores are done - * before, and provides a control dependency such that free() must come after. - * See the comment on top. - */ -static inline __refcount_check -bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) -{ - if (refcount_dec_not_one(r)) - return false; - - spin_lock(lock); - if (!refcount_dec_and_test(r)) { - spin_unlock(lock); - return false; - } - - return true; -} +extern __must_check bool refcount_dec_if_one(refcount_t *r); +extern __must_check bool refcount_dec_not_one(refcount_t *r); +extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock); +extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock); #endif /* _LINUX_REFCOUNT_H */ -- cgit v1.2.3 From 318b1dedcd39012624f466d281627553e9fa2570 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Thu, 23 Feb 2017 15:09:34 +0200 Subject: locking/refcounts: Add missing kernel.h header to have UINT_MAX defined Fix header dependency. Suggested-by: Arnaldo Carvalho de Melo Signed-off-by: Elena Reshetova Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1487855374-21993-1-git-send-email-elena.reshetova@intel.com Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 0e8cfb2ce91e..0023fee4bbbc 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -4,6 +4,7 @@ #include #include #include +#include typedef struct refcount_struct { atomic_t refs; -- cgit v1.2.3 From d1091c7fa3d52ebce4dd3f15d04155b3469b2f90 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 21 Feb 2017 15:35:32 -0600 Subject: objtool: Improve detection of BUG() and other dead ends The BUG() macro's use of __builtin_unreachable() via the unreachable() macro tells gcc that the instruction is a dead end, and that it's safe to assume the current code path will not execute past the previous instruction. On x86, the BUG() macro is implemented with the 'ud2' instruction. When objtool's branch analysis sees that instruction, it knows the current code path has come to a dead end. Peter Zijlstra has been working on a patch to change the WARN macros to use 'ud2'. That patch will break objtool's assumption that 'ud2' is always a dead end. Generally it's best for objtool to avoid making those kinds of assumptions anyway. The more ignorant it is of kernel code internals, the better. So create a more generic way for objtool to detect dead ends by adding an annotation to the unreachable() macro. The annotation stores a pointer to the end of the unreachable code path in an '__unreachable' section. Objtool can read that section to find the dead ends. Tested-by: Peter Zijlstra (Intel) Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/41a6d33971462ebd944a1c60ad4bf5be86c17b77.1487712920.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 0444b1336268..8ea159fc489d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -195,6 +195,17 @@ #endif #endif +#ifdef CONFIG_STACK_VALIDATION +#define annotate_unreachable() ({ \ + asm("1:\t\n" \ + ".pushsection __unreachable, \"a\"\t\n" \ + ".long 1b\t\n" \ + ".popsection\t\n"); \ +}) +#else +#define annotate_unreachable() +#endif + /* * Mark a position in code as unreachable. This can be used to * suppress control flow warnings after asm blocks that transfer @@ -204,7 +215,7 @@ * this in the preprocessor, but we can live with this because they're * unreleased. Really, we need to have autoconf for the kernel. */ -#define unreachable() __builtin_unreachable() +#define unreachable() annotate_unreachable(); __builtin_unreachable() /* Mark a function definition as prohibited from being cloned. */ #define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) -- cgit v1.2.3 From b18b9550e4059ceea0393c518eb323b95243f92f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 11 Feb 2017 18:46:08 +0100 Subject: libceph: get rid of ack vs commit - CEPH_OSD_FLAG_ACK shouldn't be set anymore, so assert on it - remove support for handling ack replies (OSDs will send ack replies only if clients request them) - drop the "do lingering callbacks under osd->lock" logic from handle_reply() -- lreq->lock is sufficient in all three cases Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton Reviewed-by: Sage Weil --- include/linux/ceph/osd_client.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 03a6653d329a..2ea0c282f3dc 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -22,7 +22,6 @@ struct ceph_osd_client; * completion callback for async writepages */ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); -typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool); #define CEPH_HOMELESS_OSD -1 @@ -170,15 +169,12 @@ struct ceph_osd_request { unsigned int r_num_ops; int r_result; - bool r_got_reply; struct ceph_osd_client *r_osdc; struct kref r_kref; bool r_mempool; - struct completion r_completion; - struct completion r_done_completion; /* fsync waiter */ + struct completion r_completion; /* private to osd_client.c */ ceph_osdc_callback_t r_callback; - ceph_osdc_unsafe_callback_t r_unsafe_callback; struct list_head r_unsafe_item; struct inode *r_inode; /* for use by callbacks */ -- cgit v1.2.3 From 05a45a2db42543c5f1a32e08f545aebbd7cb4790 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 24 Feb 2017 13:25:22 -0500 Subject: sunrpc: turn bitfield flags in svc_version into bools It's just simpler to read this way, IMO. Also, no need to explicitly set vs_hidden to false in the nfsacl ones. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 7321ae933867..96467c95f02e 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -400,10 +400,11 @@ struct svc_version { struct svc_procedure * vs_proc; /* per-procedure info */ u32 vs_xdrsize; /* xdrsize needed for this version */ - unsigned int vs_hidden : 1, /* Don't register with portmapper. - * Only used for nfsacl so far. */ - vs_rpcb_optnl:1;/* Don't care the result of register. - * Only used for nfsv4. */ + /* Don't register with rpcbind */ + bool vs_hidden; + + /* Don't care if the rpcbind registration fails */ + bool vs_rpcb_optnl; /* Override dispatch function (e.g. when caching replies). * A return value of 0 means drop the request. -- cgit v1.2.3 From 362142b25843fb059941aaa01b91501d5d8652cc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 24 Feb 2017 13:25:23 -0500 Subject: sunrpc: flag transports as having congestion control NFSv4 requires a transport protocol with congestion control in most cases. On an IP network, that means that NFSv4 over UDP should be forbidden. The situation with RDMA is a bit more nuanced, but most RDMA transports are suitable for this. For now, we assume that all RDMA transports are suitable, but we may need to revise that at some point. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 7440290f64ac..ddb7f94a9d06 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -67,6 +67,7 @@ struct svc_xprt { #define XPT_CACHE_AUTH 11 /* cache auth info */ #define XPT_LOCAL 12 /* connection from loopback interface */ #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ +#define XPT_CONG_CTRL 14 /* has congestion control */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ -- cgit v1.2.3 From bb292ac1c6028344013309a309b44dc691581825 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 25 Jan 2017 14:21:10 -0800 Subject: watchdog: Introduce watchdog_stop_on_unregister helper Many watchdog drivers explicitly stop the watchdog when unregistering it. While it is unclear if this is actually needed (the whatdog should not be running at that time if it can be stopped), introduce a helper to explicitly stop the watchdog in the watchdog core when unregistering it. This helps reducing driver code size while retaining functionality. Signed-off-by: Guenter Roeck --- include/linux/watchdog.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 35a4d8185b51..a786e5e8973b 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -117,6 +117,7 @@ struct watchdog_device { #define WDOG_NO_WAY_OUT 1 /* Is 'nowayout' feature set ? */ #define WDOG_STOP_ON_REBOOT 2 /* Should be stopped on reboot */ #define WDOG_HW_RUNNING 3 /* True if HW watchdog running */ +#define WDOG_STOP_ON_UNREGISTER 4 /* Should be stopped on unregister */ struct list_head deferred; }; @@ -151,6 +152,12 @@ static inline void watchdog_stop_on_reboot(struct watchdog_device *wdd) set_bit(WDOG_STOP_ON_REBOOT, &wdd->status); } +/* Use the following function to stop the watchdog when unregistering it */ +static inline void watchdog_stop_on_unregister(struct watchdog_device *wdd) +{ + set_bit(WDOG_STOP_ON_UNREGISTER, &wdd->status); +} + /* Use the following function to check if a timeout value is invalid */ static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigned int t) { -- cgit v1.2.3 From 5283b03ee5cd28d516646298bead09b238d92ddc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 24 Feb 2017 13:25:24 -0500 Subject: nfs/nfsd/sunrpc: enforce transport requirements for NFSv4 NFSv4 requires a transport "that is specified to avoid network congestion" (RFC 7530, section 3.1, paragraph 2). In practical terms, that means that you should not run NFSv4 over UDP. The server has never enforced that requirement, however. This patchset fixes this by adding a new flag to the svc_version that states that it has these transport requirements. With that, we can check that the transport has XPT_CONG_CTRL set before processing an RPC. If it doesn't we reject it with RPC_PROG_MISMATCH. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 96467c95f02e..e770abeed32d 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -406,6 +406,9 @@ struct svc_version { /* Don't care if the rpcbind registration fails */ bool vs_rpcb_optnl; + /* Need xprt with congestion control */ + bool vs_need_cong_ctrl; + /* Override dispatch function (e.g. when caching replies). * A return value of 0 means drop the request. * vs_dispatch == NULL means use default dispatcher. -- cgit v1.2.3 From 3fc21924100b13f73c734d0ce8dfcfe913fcf7a8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 24 Feb 2017 14:55:48 -0800 Subject: mm: validate device_hotplug is held for memory hotplug mem_hotplug_begin() assumes that it can set mem_hotplug.active_writer and run the hotplug process without racing another thread. Validate this assumption with a lockdep assertion. Link: http://lkml.kernel.org/r/148693886229.16345.1770484669403334689.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reported-by: Ben Hutchings Cc: Michal Hocko Cc: Toshi Kani Cc: Vlastimil Babka Cc: Logan Gunthorpe Cc: Greg Kroah-Hartman Cc: Masayoshi Mizuma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index bd684fc8ec1d..a48a7ff70164 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1139,6 +1139,7 @@ static inline bool device_supports_offline(struct device *dev) extern void lock_device_hotplug(void); extern void unlock_device_hotplug(void); extern int lock_device_hotplug_sysfs(void); +void assert_held_device_hotplug(void); extern int device_offline(struct device *dev); extern int device_online(struct device *dev); extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); -- cgit v1.2.3 From 0262d9c845ec349edf93f69688a5129c36cc2232 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Feb 2017 14:55:59 -0800 Subject: memblock: embed memblock type name within struct memblock_type Provide the name of each memblock type with struct memblock_type. This allows to get rid of the function memblock_type_name() and duplicating the type names in __memblock_dump_all(). The only memblock_type usage out of mm/memblock.c seems to be arch/s390/kernel/crash_dump.c. While at it, give it a name. Link: http://lkml.kernel.org/r/20170120123456.46508-4-heiko.carstens@de.ibm.com Signed-off-by: Heiko Carstens Cc: Philipp Hachtmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 38bcf00cbed3..bdfc65af4152 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -42,6 +42,7 @@ struct memblock_type { unsigned long max; /* size of the allocated array */ phys_addr_t total_size; /* size of all regions */ struct memblock_region *regions; + char *name; }; struct memblock { -- cgit v1.2.3 From d811914d87576c562e849c00d9f9beff45038801 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 24 Feb 2017 14:56:02 -0800 Subject: userfaultfd: non-cooperative: rename *EVENT_MADVDONTNEED to *EVENT_REMOVE Patch series "userfaultfd: non-cooperative: add madvise() event for MADV_REMOVE request". These patches add notification of madvise(MADV_REMOVE) event to non-cooperative userfaultfd monitor. The first pacth renames EVENT_MADVDONTNEED to EVENT_REMOVE along with relevant functions and structures. Using _REMOVE instead of _MADVDONTNEED describes the event semantics more clearly and I hope it's not too late for such change in the ABI. This patch (of 3): The UFFD_EVENT_MADVDONTNEED purpose is to notify uffd monitor about removal of certain range from address space tracked by userfaultfd. Hence, UFFD_EVENT_REMOVE seems to better reflect the operation semantics. Respectively, 'madv_dn' field of uffd_msg is renamed to 'remove' and the madvise_userfault_dontneed callback is renamed to userfaultfd_remove. Link: http://lkml.kernel.org/r/1484814154-1557-2-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Reviewed-by: Andrea Arcangeli Acked-by: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/userfaultfd_k.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index f431861f22f1..2521542f6c07 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -61,10 +61,10 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, unsigned long from, unsigned long to, unsigned long len); -extern void madvise_userfault_dontneed(struct vm_area_struct *vma, - struct vm_area_struct **prev, - unsigned long start, - unsigned long end); +extern void userfaultfd_remove(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, + unsigned long end); #else /* CONFIG_USERFAULTFD */ @@ -112,10 +112,10 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, { } -static inline void madvise_userfault_dontneed(struct vm_area_struct *vma, - struct vm_area_struct **prev, - unsigned long start, - unsigned long end) +static inline void userfaultfd_remove(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, + unsigned long end) { } #endif /* CONFIG_USERFAULTFD */ -- cgit v1.2.3 From 1276ad68e2491d1ceeb65f55d790f9277593c459 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 24 Feb 2017 14:56:11 -0800 Subject: mm: vmscan: scan dirty pages even in laptop mode Patch series "mm: vmscan: fix kswapd writeback regression". We noticed a regression on multiple hadoop workloads when moving from 3.10 to 4.0 and 4.6, which involves kswapd getting tangled up in page writeout, causing direct reclaim herds that also don't make progress. I tracked it down to the thrash avoidance efforts after 3.10 that make the kernel better at keeping use-once cache and use-many cache sorted on the inactive and active list, with more aggressive protection of the active list as long as there is inactive cache. Unfortunately, our workload's use-once cache is mostly from streaming writes. Waiting for writes to avoid potential reloads in the future is not a good tradeoff. These patches do the following: 1. Wake the flushers when kswapd sees a lump of dirty pages. It's possible to be below the dirty background limit and still have cache velocity push them through the LRU. So start a-flushin'. 2. Let kswapd only write pages that have been rotated twice. This makes sure we really tried to get all the clean pages on the inactive list before resorting to horrible LRU-order writeback. 3. Move rotating dirty pages off the inactive list. Instead of churning or waiting on page writeback, we'll go after clean active cache. This might lead to thrashing, but in this state memory demand outstrips IO speed anyway, and reads are faster than writes. Mel backported the series to 4.10-rc5 with one minor conflict and ran a couple of tests on it. Mix of read/write random workload didn't show anything interesting. Write-only database didn't show much difference in performance but there were slight reductions in IO -- probably in the noise. simoop did show big differences although not as big as Mel expected. This is Chris Mason's workload that similate the VM activity of hadoop. Mel won't go through the full details but over the samples measured during an hour it reported 4.10.0-rc5 4.10.0-rc5 vanilla johannes-v1r1 Amean p50-Read 21346531.56 ( 0.00%) 21697513.24 ( -1.64%) Amean p95-Read 24700518.40 ( 0.00%) 25743268.98 ( -4.22%) Amean p99-Read 27959842.13 ( 0.00%) 28963271.11 ( -3.59%) Amean p50-Write 1138.04 ( 0.00%) 989.82 ( 13.02%) Amean p95-Write 1106643.48 ( 0.00%) 12104.00 ( 98.91%) Amean p99-Write 1569213.22 ( 0.00%) 36343.38 ( 97.68%) Amean p50-Allocation 85159.82 ( 0.00%) 79120.70 ( 7.09%) Amean p95-Allocation 204222.58 ( 0.00%) 129018.43 ( 36.82%) Amean p99-Allocation 278070.04 ( 0.00%) 183354.43 ( 34.06%) Amean final-p50-Read 21266432.00 ( 0.00%) 21921792.00 ( -3.08%) Amean final-p95-Read 24870912.00 ( 0.00%) 26116096.00 ( -5.01%) Amean final-p99-Read 28147712.00 ( 0.00%) 29523968.00 ( -4.89%) Amean final-p50-Write 1130.00 ( 0.00%) 977.00 ( 13.54%) Amean final-p95-Write 1033216.00 ( 0.00%) 2980.00 ( 99.71%) Amean final-p99-Write 1517568.00 ( 0.00%) 32672.00 ( 97.85%) Amean final-p50-Allocation 86656.00 ( 0.00%) 78464.00 ( 9.45%) Amean final-p95-Allocation 211712.00 ( 0.00%) 116608.00 ( 44.92%) Amean final-p99-Allocation 287232.00 ( 0.00%) 168704.00 ( 41.27%) The latencies are actually completely horrific in comparison to 4.4 (and 4.10-rc5 is worse than 4.9 according to historical data for reasons Mel hasn't analysed yet). Still, 95% of write latency (p95-write) is halved by the series and allocation latency is way down. Direct reclaim activity is one fifth of what it was according to vmstats. Kswapd activity is higher but this is not necessarily surprising. Kswapd efficiency is unchanged at 99% (99% of pages scanned were reclaimed) but direct reclaim efficiency went from 77% to 99% In the vanilla kernel, 627MB of data was written back from reclaim context. With the series, no data was written back. With or without the patch, pages are being immediately reclaimed after writeback completes. However, with the patch, only 1/8th of the pages are reclaimed like this. This patch (of 5): We have an elaborate dirty/writeback throttling mechanism inside the reclaim scanner, but for that to work the pages have to go through shrink_page_list() and get counted for what they are. Otherwise, we mess up the LRU order and don't match reclaim speed to writeback. Especially during deactivation, there is never a reason to skip dirty pages; nothing is even trying to write them out from there. Don't mess up the LRU order for nothing, shuffle these pages along. Link: http://lkml.kernel.org/r/20170123181641.23938-2-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Minchan Kim Acked-by: Michal Hocko Acked-by: Mel Gorman Acked-by: Hillf Danton Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 82fc632fd11d..8e02b3750fe0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -236,8 +236,6 @@ struct lruvec { #define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON)) #define LRU_ALL ((1 << NR_LRU_LISTS) - 1) -/* Isolate clean file */ -#define ISOLATE_CLEAN ((__force isolate_mode_t)0x1) /* Isolate unmapped file */ #define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) /* Isolate for asynchronous migration */ -- cgit v1.2.3 From 726d061fbd3658e4bfeffa1b8e82da97de2ca4dd Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 24 Feb 2017 14:56:14 -0800 Subject: mm: vmscan: kick flushers when we encounter dirty pages on the LRU Memory pressure can put dirty pages at the end of the LRU without anybody running into dirty limits. Don't start writing individual pages from kswapd while the flushers might be asleep. Unlike the old direct reclaim flusher wakeup (removed in the next patch) that flushes the number of pages just scanned, this patch wakes the flushers for all outstanding dirty pages. That seemed to perform better in a synthetic test that pushes dirty pages to the end of the LRU and into reclaim, because we know LRU aging outstrips writeback already, and this way we give younger dirty pages a headstart rather than wait until reclaim runs into them as well. It also means less plugging and risk of exhausting the struct request pool from reclaim. There is a concern that this will cause temporary files that used to get dirtied and truncated before writeback to now get written to disk under memory pressure. If this turns out to be a real problem, we'll have to revisit this and tame the reclaim flusher wakeups. [hannes@cmpxchg.org: mention dirty expiration as a condition] Link: http://lkml.kernel.org/r/20170126174739.GA30636@cmpxchg.org Link: http://lkml.kernel.org/r/20170123181641.23938-3-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Minchan Kim Acked-by: Michal Hocko Acked-by: Mel Gorman Acked-by: Hillf Danton Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 5527d910ba3d..a3c0cbd7c888 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -46,7 +46,7 @@ enum writeback_sync_modes { */ enum wb_reason { WB_REASON_BACKGROUND, - WB_REASON_TRY_TO_FREE_PAGES, + WB_REASON_VMSCAN, WB_REASON_SYNC, WB_REASON_PERIODIC, WB_REASON_LAPTOP_TIMER, -- cgit v1.2.3 From c55e8d035b28b2867e68b0e2d0eee2c0f1016b43 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 24 Feb 2017 14:56:23 -0800 Subject: mm: vmscan: move dirty pages out of the way until they're flushed We noticed a performance regression when moving hadoop workloads from 3.10 kernels to 4.0 and 4.6. This is accompanied by increased pageout activity initiated by kswapd as well as frequent bursts of allocation stalls and direct reclaim scans. Even lowering the dirty ratios to the equivalent of less than 1% of memory would not eliminate the issue, suggesting that dirty pages concentrate where the scanner is looking. This can be traced back to recent efforts of thrash avoidance. Where 3.10 would not detect refaulting pages and continuously supply clean cache to the inactive list, a thrashing workload on 4.0+ will detect and activate refaulting pages right away, distilling used-once pages on the inactive list much more effectively. This is by design, and it makes sense for clean cache. But for the most part our workload's cache faults are refaults and its use-once cache is from streaming writes. We end up with most of the inactive list dirty, and we don't go after the active cache as long as we have use-once pages around. But waiting for writes to avoid reclaiming clean cache that *might* refault is a bad trade-off. Even if the refaults happen, reads are faster than writes. Before getting bogged down on writeback, reclaim should first look at *all* cache in the system, even active cache. To accomplish this, activate pages that are dirty or under writeback when they reach the end of the inactive LRU. The pages are marked for immediate reclaim, meaning they'll get moved back to the inactive LRU tail as soon as they're written back and become reclaimable. But in the meantime, by reducing the inactive list to only immediately reclaimable pages, we allow the scanner to deactivate and refill the inactive list with clean cache from the active list tail to guarantee forward progress. [hannes@cmpxchg.org: update comment] Link: http://lkml.kernel.org/r/20170202191957.22872-8-hannes@cmpxchg.org Link: http://lkml.kernel.org/r/20170123181641.23938-6-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Minchan Kim Acked-by: Michal Hocko Acked-by: Hillf Danton Cc: Mel Gorman Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 41d376e7116d..e030a68ead7e 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -50,6 +50,13 @@ static __always_inline void add_page_to_lru_list(struct page *page, list_add(&page->lru, &lruvec->lists[lru]); } +static __always_inline void add_page_to_lru_list_tail(struct page *page, + struct lruvec *lruvec, enum lru_list lru) +{ + update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page)); + list_add_tail(&page->lru, &lruvec->lists[lru]); +} + static __always_inline void del_page_from_lru_list(struct page *page, struct lruvec *lruvec, enum lru_list lru) { -- cgit v1.2.3 From 11bac80004499ea59f361ef2a5516c84b6eab675 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 24 Feb 2017 14:56:41 -0800 Subject: mm, fs: reduce fault, page_mkwrite, and pfn_mkwrite to take only vmf ->fault(), ->page_mkwrite(), and ->pfn_mkwrite() calls do not need to take a vma and vmf parameter when the vma already resides in vmf. Remove the vma parameter to simplify things. [arnd@arndb.de: fix ARM build] Link: http://lkml.kernel.org/r/20170125223558.1451224-1-arnd@arndb.de Link: http://lkml.kernel.org/r/148521301778.19116.10840599906674778980.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Dave Jiang Signed-off-by: Arnd Bergmann Reviewed-by: Ross Zwisler Cc: Theodore Ts'o Cc: Darrick J. Wong Cc: Matthew Wilcox Cc: Dave Hansen Cc: Christoph Hellwig Cc: Jan Kara Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dax.h | 5 ++--- include/linux/iomap.h | 3 +-- include/linux/mm.h | 10 +++++----- 3 files changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 1e77ff5818f1..eeb02421c848 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -38,8 +38,7 @@ static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags) ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); -int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, - const struct iomap_ops *ops); +int dax_iomap_fault(struct vm_fault *vmf, const struct iomap_ops *ops); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, @@ -83,7 +82,7 @@ static inline int dax_iomap_pmd_fault(struct vm_fault *vmf, return VM_FAULT_FALLBACK; } #endif -int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); +int dax_pfn_mkwrite(struct vm_fault *vmf); static inline bool vma_is_dax(struct vm_area_struct *vma) { diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 891459caa278..7291810067eb 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -79,8 +79,7 @@ int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops); int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, const struct iomap_ops *ops); -int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - const struct iomap_ops *ops); +int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops); int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, loff_t start, loff_t len, const struct iomap_ops *ops); diff --git a/include/linux/mm.h b/include/linux/mm.h index 574bc157a27c..3dd80ba6568a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -350,17 +350,17 @@ struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); int (*mremap)(struct vm_area_struct * area); - int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); + int (*fault)(struct vm_fault *vmf); int (*pmd_fault)(struct vm_fault *vmf); void (*map_pages)(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ - int (*page_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf); + int (*page_mkwrite)(struct vm_fault *vmf); /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */ - int (*pfn_mkwrite)(struct vm_area_struct *vma, struct vm_fault *vmf); + int (*pfn_mkwrite)(struct vm_fault *vmf); /* called by access_process_vm when get_user_pages() fails, typically * for use by special VMAs that can switch between memory and hardware @@ -2124,10 +2124,10 @@ extern void truncate_inode_pages_range(struct address_space *, extern void truncate_inode_pages_final(struct address_space *); /* generic vm_area_ops exported for stackable file systems */ -extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); +extern int filemap_fault(struct vm_fault *vmf); extern void filemap_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); -extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); +extern int filemap_page_mkwrite(struct vm_fault *vmf); /* mm/page-writeback.c */ int write_one_page(struct page *page, int wait); -- cgit v1.2.3 From a2d581675d485eb7188f521f36efc114639a3096 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 24 Feb 2017 14:56:59 -0800 Subject: mm,fs,dax: change ->pmd_fault to ->huge_fault Patch series "1G transparent hugepage support for device dax", v2. The following series implements support for 1G trasparent hugepage on x86 for device dax. The bulk of the code was written by Mathew Wilcox a while back supporting transparent 1G hugepage for fs DAX. I have forward ported the relevant bits to 4.10-rc. The current submission has only the necessary code to support device DAX. Comments from Dan Williams: So the motivation and intended user of this functionality mirrors the motivation and users of 1GB page support in hugetlbfs. Given expected capacities of persistent memory devices an in-memory database may want to reduce tlb pressure beyond what they can already achieve with 2MB mappings of a device-dax file. We have customer feedback to that effect as Willy mentioned in his previous version of these patches [1]. [1]: https://lkml.org/lkml/2016/1/31/52 Comments from Nilesh @ Oracle: There are applications which have a process model; and if you assume 10,000 processes attempting to mmap all the 6TB memory available on a server; we are looking at the following: processes : 10,000 memory : 6TB pte @ 4k page size: 8 bytes / 4K of memory * #processes = 6TB / 4k * 8 * 10000 = 1.5GB * 80000 = 120,000GB pmd @ 2M page size: 120,000 / 512 = ~240GB pud @ 1G page size: 240GB / 512 = ~480MB As you can see with 2M pages, this system will use up an exorbitant amount of DRAM to hold the page tables; but the 1G pages finally brings it down to a reasonable level. Memory sizes will keep increasing; so this number will keep increasing. An argument can be made to convert the applications from process model to thread model, but in the real world that may not be always practical. Hopefully this helps explain the use case where this is valuable. This patch (of 3): In preparation for adding the ability to handle PUD pages, convert vm_operations_struct.pmd_fault to vm_operations_struct.huge_fault. The vm_fault structure is extended to include a union of the different page table pointers that may be needed, and three flag bits are reserved to indicate which type of pointer is in the union. [ross.zwisler@linux.intel.com: remove unused function ext4_dax_huge_fault()] Link: http://lkml.kernel.org/r/1485813172-7284-1-git-send-email-ross.zwisler@linux.intel.com [dave.jiang@intel.com: clear PMD or PUD size flags when in fall through path] Link: http://lkml.kernel.org/r/148589842696.5820.16078080610311444794.stgit@djiang5-desk3.ch.intel.com Link: http://lkml.kernel.org/r/148545058784.17912.6353162518188733642.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Matthew Wilcox Signed-off-by: Dave Jiang Signed-off-by: Ross Zwisler Cc: Dave Hansen Cc: Vlastimil Babka Cc: Jan Kara Cc: Dan Williams Cc: Kirill A. Shutemov Cc: Nilesh Choudhury Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Dave Jiang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dax.h | 6 ------ include/linux/mm.h | 10 +++++++++- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index eeb02421c848..cf9af225962b 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -70,17 +70,11 @@ static inline unsigned int dax_radix_order(void *entry) return PMD_SHIFT - PAGE_SHIFT; return 0; } -int dax_iomap_pmd_fault(struct vm_fault *vmf, const struct iomap_ops *ops); #else static inline unsigned int dax_radix_order(void *entry) { return 0; } -static inline int dax_iomap_pmd_fault(struct vm_fault *vmf, - const struct iomap_ops *ops) -{ - return VM_FAULT_FALLBACK; -} #endif int dax_pfn_mkwrite(struct vm_fault *vmf); diff --git a/include/linux/mm.h b/include/linux/mm.h index 3dd80ba6568a..035a688e5472 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -285,6 +285,11 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ #define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ +#define FAULT_FLAG_SIZE_MASK 0x7000 /* Support up to 8-level page tables */ +#define FAULT_FLAG_SIZE_PTE 0x0000 /* First level (eg 4k) */ +#define FAULT_FLAG_SIZE_PMD 0x1000 /* Second level (eg 2MB) */ +#define FAULT_FLAG_SIZE_PUD 0x2000 /* Third level (eg 1GB) */ + #define FAULT_FLAG_TRACE \ { FAULT_FLAG_WRITE, "WRITE" }, \ { FAULT_FLAG_MKWRITE, "MKWRITE" }, \ @@ -314,6 +319,9 @@ struct vm_fault { unsigned long address; /* Faulting virtual address */ pmd_t *pmd; /* Pointer to pmd entry matching * the 'address' */ + pud_t *pud; /* Pointer to pud entry matching + * the 'address' + */ pte_t orig_pte; /* Value of PTE at the time of fault */ struct page *cow_page; /* Page handler may use for COW fault */ @@ -351,7 +359,7 @@ struct vm_operations_struct { void (*close)(struct vm_area_struct * area); int (*mremap)(struct vm_area_struct * area); int (*fault)(struct vm_fault *vmf); - int (*pmd_fault)(struct vm_fault *vmf); + int (*huge_fault)(struct vm_fault *vmf); void (*map_pages)(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); -- cgit v1.2.3 From a00cc7d9dd93d66a3fb83fc52aa57a4bec51c517 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 24 Feb 2017 14:57:02 -0800 Subject: mm, x86: add support for PUD-sized transparent hugepages The current transparent hugepage code only supports PMDs. This patch adds support for transparent use of PUDs with DAX. It does not include support for anonymous pages. x86 support code also added. Most of this patch simply parallels the work that was done for huge PMDs. The only major difference is how the new ->pud_entry method in mm_walk works. The ->pmd_entry method replaces the ->pte_entry method, whereas the ->pud_entry method works along with either ->pmd_entry or ->pte_entry. The pagewalk code takes care of locking the PUD before calling ->pud_walk, so handlers do not need to worry whether the PUD is stable. [dave.jiang@intel.com: fix SMP x86 32bit build for native_pud_clear()] Link: http://lkml.kernel.org/r/148719066814.31111.3239231168815337012.stgit@djiang5-desk3.ch.intel.com [dave.jiang@intel.com: native_pud_clear missing on i386 build] Link: http://lkml.kernel.org/r/148640375195.69754.3315433724330910314.stgit@djiang5-desk3.ch.intel.com Link: http://lkml.kernel.org/r/148545059381.17912.8602162635537598445.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Matthew Wilcox Signed-off-by: Dave Jiang Tested-by: Alexander Kapshuk Cc: Dave Hansen Cc: Vlastimil Babka Cc: Jan Kara Cc: Dan Williams Cc: Ross Zwisler Cc: Kirill A. Shutemov Cc: Nilesh Choudhury Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 83 +++++++++++++++++++++++++++++++++++++++++--- include/linux/mm.h | 30 +++++++++++++++- include/linux/mmu_notifier.h | 14 ++++++++ include/linux/pfn_t.h | 12 +++++++ 4 files changed, 133 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f0029e786205..a3762d49ba39 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -6,6 +6,18 @@ extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, struct vm_area_struct *vma); extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd); +extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pud_t *dst_pud, pud_t *src_pud, unsigned long addr, + struct vm_area_struct *vma); + +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud); +#else +static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) +{ +} +#endif + extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd); extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, unsigned long addr, @@ -17,6 +29,9 @@ extern bool madvise_free_huge_pmd(struct mmu_gather *tlb, extern int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr); +extern int zap_huge_pud(struct mmu_gather *tlb, + struct vm_area_struct *vma, + pud_t *pud, unsigned long addr); extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned char *vec); @@ -26,8 +41,10 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa); -int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *, - pfn_t pfn, bool write); +int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd, pfn_t pfn, bool write); +int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, + pud_t *pud, pfn_t pfn, bool write); enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_FLAG, TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, @@ -58,13 +75,14 @@ extern struct kobj_attribute shmem_enabled_attr; #define HPAGE_PMD_NR (1<vm_mm->mmap_sem), vma); + if (pud_trans_huge(*pud) || pud_devmap(*pud)) + return __pud_trans_huge_lock(pud, vma); + else + return NULL; +} static inline int hpage_nr_pages(struct page *page) { if (unlikely(PageTransHuge(page))) @@ -143,6 +183,11 @@ static inline int hpage_nr_pages(struct page *page) return 1; } +struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd, int flags); +struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, + pud_t *pud, int flags); + extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd); extern struct page *huge_zero_page; @@ -157,6 +202,11 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) return is_huge_zero_page(pmd_page(pmd)); } +static inline bool is_huge_zero_pud(pud_t pud) +{ + return false; +} + struct page *mm_get_huge_zero_page(struct mm_struct *mm); void mm_put_huge_zero_page(struct mm_struct *mm); @@ -167,6 +217,10 @@ void mm_put_huge_zero_page(struct mm_struct *mm); #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; }) +#define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; }) +#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) +#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) + #define hpage_nr_pages(x) 1 #define transparent_hugepage_enabled(__vma) 0 @@ -195,6 +249,9 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, static inline void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, bool freeze, struct page *page) {} +#define split_huge_pud(__vma, __pmd, __address) \ + do { } while (0) + static inline int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) { @@ -212,6 +269,11 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, { return NULL; } +static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, + struct vm_area_struct *vma) +{ + return NULL; +} static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd) { @@ -223,6 +285,11 @@ static inline bool is_huge_zero_page(struct page *page) return false; } +static inline bool is_huge_zero_pud(pud_t pud) +{ + return false; +} + static inline void mm_put_huge_zero_page(struct mm_struct *mm) { return; @@ -233,6 +300,12 @@ static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, { return NULL; } + +static inline struct page *follow_devmap_pud(struct vm_area_struct *vma, + unsigned long addr, pud_t *pud, int flags) +{ + return NULL; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 035a688e5472..d8b75d7d6a9e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -424,6 +424,10 @@ static inline int pmd_devmap(pmd_t pmd) { return 0; } +static inline int pud_devmap(pud_t pud) +{ + return 0; +} #endif /* @@ -1199,6 +1203,10 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, /** * mm_walk - callbacks for walk_page_range + * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry + * this handler should only handle pud_trans_huge() puds. + * the pmd_entry or pte_entry callbacks will be used for + * regular PUDs. * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry * this handler is required to be able to handle * pmd_trans_huge() pmds. They may simply choose to @@ -1218,6 +1226,8 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, * (see the comment on walk_page_range() for more details) */ struct mm_walk { + int (*pud_entry)(pud_t *pud, unsigned long addr, + unsigned long next, struct mm_walk *walk); int (*pmd_entry)(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk); int (*pte_entry)(pte_t *pte, unsigned long addr, @@ -1801,8 +1811,26 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) return ptl; } -extern void __init pagecache_init(void); +/* + * No scalability reason to split PUD locks yet, but follow the same pattern + * as the PMD locks to make it easier if we decide to. The VM should not be + * considered ready to switch to split PUD locks yet; there may be places + * which need to be converted from page_table_lock. + */ +static inline spinlock_t *pud_lockptr(struct mm_struct *mm, pud_t *pud) +{ + return &mm->page_table_lock; +} + +static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) +{ + spinlock_t *ptl = pud_lockptr(mm, pud); + + spin_lock(ptl); + return ptl; +} +extern void __init pagecache_init(void); extern void free_area_init(unsigned long * zones_size); extern void free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index a1a210d59961..51891fb0d3ce 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -381,6 +381,19 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) ___pmd; \ }) +#define pudp_huge_clear_flush_notify(__vma, __haddr, __pud) \ +({ \ + unsigned long ___haddr = __haddr & HPAGE_PUD_MASK; \ + struct mm_struct *___mm = (__vma)->vm_mm; \ + pud_t ___pud; \ + \ + ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud); \ + mmu_notifier_invalidate_range(___mm, ___haddr, \ + ___haddr + HPAGE_PUD_SIZE); \ + \ + ___pud; \ +}) + #define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \ ({ \ unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ @@ -475,6 +488,7 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) #define pmdp_clear_young_notify pmdp_test_and_clear_young #define ptep_clear_flush_notify ptep_clear_flush #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush +#define pudp_huge_clear_flush_notify pudp_huge_clear_flush #define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear #define set_pte_at_notify set_pte_at diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 033fc7bbcefa..a49b3259cad7 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -90,6 +90,13 @@ static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot) { return pfn_pmd(pfn_t_to_pfn(pfn), pgprot); } + +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot) +{ + return pfn_pud(pfn_t_to_pfn(pfn), pgprot); +} +#endif #endif #ifdef __HAVE_ARCH_PTE_DEVMAP @@ -106,5 +113,10 @@ static inline bool pfn_t_devmap(pfn_t pfn) } pte_t pte_mkdevmap(pte_t pte); pmd_t pmd_mkdevmap(pmd_t pmd); +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ + defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) +pud_t pud_mkdevmap(pud_t pud); #endif +#endif /* __HAVE_ARCH_PTE_DEVMAP */ + #endif /* _LINUX_PFN_T_H_ */ -- cgit v1.2.3 From c791ace1e747371658237f0d30234fef56c39669 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 24 Feb 2017 14:57:08 -0800 Subject: mm: replace FAULT_FLAG_SIZE with parameter to huge_fault Since the introduction of FAULT_FLAG_SIZE to the vm_fault flag, it has been somewhat painful with getting the flags set and removed at the correct locations. More than one kernel oops was introduced due to difficulties of getting the placement correctly. Remove the flag values and introduce an input parameter to huge_fault that indicates the size of the page entry. This makes the code easier to trace and should avoid the issues we see with the fault flags where removal of the flag was necessary in the fallback paths. Link: http://lkml.kernel.org/r/148615748258.43180.1690152053774975329.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Dave Jiang Tested-by: Dan Williams Reviewed-by: Jan Kara Cc: Matthew Wilcox Cc: Dave Hansen Cc: Vlastimil Babka Cc: Ross Zwisler Cc: Kirill A. Shutemov Cc: Nilesh Choudhury Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dax.h | 3 ++- include/linux/mm.h | 14 ++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index cf9af225962b..d8a3dc042e1c 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -38,7 +38,8 @@ static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags) ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); -int dax_iomap_fault(struct vm_fault *vmf, const struct iomap_ops *ops); +int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, + const struct iomap_ops *ops); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, diff --git a/include/linux/mm.h b/include/linux/mm.h index d8b75d7d6a9e..c65aa43b5712 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -285,11 +285,6 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ #define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ -#define FAULT_FLAG_SIZE_MASK 0x7000 /* Support up to 8-level page tables */ -#define FAULT_FLAG_SIZE_PTE 0x0000 /* First level (eg 4k) */ -#define FAULT_FLAG_SIZE_PMD 0x1000 /* Second level (eg 2MB) */ -#define FAULT_FLAG_SIZE_PUD 0x2000 /* Third level (eg 1GB) */ - #define FAULT_FLAG_TRACE \ { FAULT_FLAG_WRITE, "WRITE" }, \ { FAULT_FLAG_MKWRITE, "MKWRITE" }, \ @@ -349,6 +344,13 @@ struct vm_fault { */ }; +/* page entry size for vm->huge_fault() */ +enum page_entry_size { + PE_SIZE_PTE = 0, + PE_SIZE_PMD, + PE_SIZE_PUD, +}; + /* * These are the virtual MM functions - opening of an area, closing and * unmapping it (needed to keep files on disk up-to-date etc), pointer @@ -359,7 +361,7 @@ struct vm_operations_struct { void (*close)(struct vm_area_struct * area); int (*mremap)(struct vm_area_struct * area); int (*fault)(struct vm_fault *vmf); - int (*huge_fault)(struct vm_fault *vmf); + int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); void (*map_pages)(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); -- cgit v1.2.3 From 9e5bcd610ffcedf5e485e78a72762810b25c7f25 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Fri, 24 Feb 2017 14:57:29 -0800 Subject: mm/migration: make isolate_movable_page() return int type Patch series "HWPOISON: soft offlining for non-lru movable page", v6. After Minchan's commit bda807d44454 ("mm: migrate: support non-lru movable page migration"), some type of non-lru page like zsmalloc and virtio-balloon page also support migration. Therefore, we can: 1) soft offlining no-lru movable pages, which means when memory corrected errors occur on a non-lru movable page, we can stop to use it by migrating data onto another page and disable the original (maybe half-broken) one. 2) enable memory hotplug for non-lru movable pages, i.e. we may offline blocks, which include such pages, by using non-lru page migration. This patchset is heavily dependent on non-lru movable page migration. This patch (of 4): Change the return type of isolate_movable_page() from bool to int. It will return 0 when isolate movable page successfully, and return -EBUSY when it isolates failed. There is no functional change within this patch but prepare for later patch. [xieyisheng1@huawei.com: v6] Link: http://lkml.kernel.org/r/1486108770-630-2-git-send-email-xieyisheng1@huawei.com Link: http://lkml.kernel.org/r/1485867981-16037-2-git-send-email-ysxie@foxmail.com Signed-off-by: Yisheng Xie Suggested-by: Michal Hocko Acked-by: Minchan Kim Cc: Andi Kleen Cc: Hanjun Guo Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Naoya Horiguchi Cc: Reza Arbab Cc: Taku Izumi Cc: Vitaly Kuznetsov Cc: Vlastimil Babka Cc: Xishi Qiu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index ae8d475a9385..43d5deb0c4cc 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -37,7 +37,7 @@ extern int migrate_page(struct address_space *, struct page *, struct page *, enum migrate_mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason); -extern bool isolate_movable_page(struct page *page, isolate_mode_t mode); +extern int isolate_movable_page(struct page *page, isolate_mode_t mode); extern void putback_movable_page(struct page *page); extern int migrate_prep(void); -- cgit v1.2.3 From cbae0170e5329c79c0a36a81fedd2800146aca12 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Fri, 24 Feb 2017 14:57:32 -0800 Subject: mm/migration: make isolate_movable_page always defined Define isolate_movable_page as a static inline function when CONFIG_MIGRATION is not enable. It should return -EBUSY here which means failed to isolate movable pages. This patch do not have any functional change but prepare for later patch. Link: http://lkml.kernel.org/r/1485867981-16037-3-git-send-email-ysxie@foxmail.com Signed-off-by: Yisheng Xie Acked-by: Minchan Kim Suggested-by: Michal Hocko Cc: Naoya Horiguchi Cc: Vlastimil Babka Cc: Andi Kleen Cc: Hanjun Guo Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Mel Gorman Cc: Reza Arbab Cc: Taku Izumi Cc: Vitaly Kuznetsov Cc: Xishi Qiu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 43d5deb0c4cc..fa76b516fa47 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -56,6 +56,8 @@ static inline int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason) { return -ENOSYS; } +static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) + { return -EBUSY; } static inline int migrate_prep(void) { return -ENOSYS; } static inline int migrate_prep_local(void) { return -ENOSYS; } -- cgit v1.2.3 From ace71a19cec5eb430207c3269d8a2683f0574306 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 24 Feb 2017 14:57:45 -0800 Subject: mm: introduce page_vma_mapped_walk() Introduce a new interface to check if a page is mapped into a vma. It aims to address shortcomings of page_check_address{,_transhuge}. Existing interface is not able to handle PTE-mapped THPs: it only finds the first PTE. The rest lefted unnoticed. page_vma_mapped_walk() iterates over all possible mapping of the page in the vma. Link: http://lkml.kernel.org/r/20170129173858.45174-3-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Hillf Danton Cc: Hugh Dickins Cc: Johannes Weiner Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Srikar Dronamraju Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 15321fb1df6b..b76343610653 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -9,6 +9,7 @@ #include #include #include +#include /* * The anon_vma heads a list of private "related" vmas, to scan if @@ -232,6 +233,31 @@ static inline bool page_check_address_transhuge(struct page *page, } #endif +/* Avoid racy checks */ +#define PVMW_SYNC (1 << 0) +/* Look for migarion entries rather than present PTEs */ +#define PVMW_MIGRATION (1 << 1) + +struct page_vma_mapped_walk { + struct page *page; + struct vm_area_struct *vma; + unsigned long address; + pmd_t *pmd; + pte_t *pte; + spinlock_t *ptl; + unsigned int flags; +}; + +static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw) +{ + if (pvmw->pte) + pte_unmap(pvmw->pte); + if (pvmw->ptl) + spin_unlock(pvmw->ptl); +} + +bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw); + /* * Used by swapoff to help locate where page is expected in vma. */ -- cgit v1.2.3 From d53a8b49a626fdfce4390710da6d04b4314db25f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 24 Feb 2017 14:58:13 -0800 Subject: mm: drop page_check_address{,_transhuge} All users are gone. Let's drop them. Link: http://lkml.kernel.org/r/20170129173858.45174-12-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Hillf Danton Cc: Hugh Dickins Cc: Johannes Weiner Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Srikar Dronamraju Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 36 ------------------------------------ 1 file changed, 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b76343610653..8c89e902df3e 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -197,42 +197,6 @@ int page_referenced(struct page *, int is_locked, int try_to_unmap(struct page *, enum ttu_flags flags); -/* - * Used by uprobes to replace a userspace page safely - */ -pte_t *__page_check_address(struct page *, struct mm_struct *, - unsigned long, spinlock_t **, int); - -static inline pte_t *page_check_address(struct page *page, struct mm_struct *mm, - unsigned long address, - spinlock_t **ptlp, int sync) -{ - pte_t *ptep; - - __cond_lock(*ptlp, ptep = __page_check_address(page, mm, address, - ptlp, sync)); - return ptep; -} - -/* - * Used by idle page tracking to check if a page was referenced via page - * tables. - */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -bool page_check_address_transhuge(struct page *page, struct mm_struct *mm, - unsigned long address, pmd_t **pmdp, - pte_t **ptep, spinlock_t **ptlp); -#else -static inline bool page_check_address_transhuge(struct page *page, - struct mm_struct *mm, unsigned long address, - pmd_t **pmdp, pte_t **ptep, spinlock_t **ptlp) -{ - *ptep = page_check_address(page, mm, address, ptlp, 0); - *pmdp = NULL; - return !!*ptep; -} -#endif - /* Avoid racy checks */ #define PVMW_SYNC (1 << 0) /* Look for migarion entries rather than present PTEs */ -- cgit v1.2.3 From 897ab3e0c49e24b62e2d54d165c7afec6bbca65b Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 24 Feb 2017 14:58:22 -0800 Subject: userfaultfd: non-cooperative: add event for memory unmaps When a non-cooperative userfaultfd monitor copies pages in the background, it may encounter regions that were already unmapped. Addition of UFFD_EVENT_UNMAP allows the uffd monitor to track precisely changes in the virtual memory layout. Since there might be different uffd contexts for the affected VMAs, we first should create a temporary representation for the unmap event for each uffd context and then notify them one by one to the appropriate userfault file descriptors. The event notification occurs after the mmap_sem has been released. [arnd@arndb.de: fix nommu build] Link: http://lkml.kernel.org/r/20170203165141.3665284-1-arnd@arndb.de [mhocko@suse.com: fix nommu build] Link: http://lkml.kernel.org/r/20170202091503.GA22823@dhcp22.suse.cz Link: http://lkml.kernel.org/r/1485542673-24387-3-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Signed-off-by: Michal Hocko Signed-off-by: Arnd Bergmann Acked-by: Hillf Danton Cc: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 14 +++++++++----- include/linux/userfaultfd_k.h | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c65aa43b5712..c6fcba1d1ae5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2090,18 +2090,22 @@ extern int install_special_mapping(struct mm_struct *mm, extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); extern unsigned long mmap_region(struct file *file, unsigned long addr, - unsigned long len, vm_flags_t vm_flags, unsigned long pgoff); + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, + struct list_head *uf); extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate); -extern int do_munmap(struct mm_struct *, unsigned long, size_t); + vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, + struct list_head *uf); +extern int do_munmap(struct mm_struct *, unsigned long, size_t, + struct list_head *uf); static inline unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - unsigned long pgoff, unsigned long *populate) + unsigned long pgoff, unsigned long *populate, + struct list_head *uf) { - return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate); + return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate, uf); } #ifdef CONFIG_MMU diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 2521542f6c07..a40be5d0661b 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -66,6 +66,12 @@ extern void userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct list_head *uf); +extern void userfaultfd_unmap_complete(struct mm_struct *mm, + struct list_head *uf); + #else /* CONFIG_USERFAULTFD */ /* mm helpers */ @@ -118,6 +124,18 @@ static inline void userfaultfd_remove(struct vm_area_struct *vma, unsigned long end) { } + +static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct list_head *uf) +{ + return 0; +} + +static inline void userfaultfd_unmap_complete(struct mm_struct *mm, + struct list_head *uf) +{ +} #endif /* CONFIG_USERFAULTFD */ #endif /* _LINUX_USERFAULTFD_K_H */ -- cgit v1.2.3 From ca49ca7114553587736fe78319e22f073b631380 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 24 Feb 2017 14:58:25 -0800 Subject: userfaultfd: non-cooperative: add event for exit() notification Allow userfaultfd monitor track termination of the processes that have memory backed by the uffd. [rppt@linux.vnet.ibm.com: add comment] Link: http://lkml.kernel.org/r/20170202135448.GB19804@rapoport-lnxLink: http://lkml.kernel.org/r/1485542673-24387-4-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Acked-by: Hillf Danton Cc: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/userfaultfd_k.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index a40be5d0661b..0468548acebf 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -72,6 +72,8 @@ extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, extern void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf); +extern void userfaultfd_exit(struct mm_struct *mm); + #else /* CONFIG_USERFAULTFD */ /* mm helpers */ @@ -136,6 +138,11 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf) { } + +static inline void userfaultfd_exit(struct mm_struct *mm) +{ +} + #endif /* CONFIG_USERFAULTFD */ #endif /* _LINUX_USERFAULTFD_K_H */ -- cgit v1.2.3 From ca96b625341027f611c3e61351a70311077ebcf5 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 24 Feb 2017 14:58:37 -0800 Subject: mm: alloc_contig_range: allow to specify GFP mask Currently alloc_contig_range assumes that the compaction should be done with the default GFP_KERNEL flags. This is probably right for all current uses of this interface, but may change as CMA is used in more use-cases (including being the default DMA memory allocator on some platforms). Change the function prototype, to allow for passing through the GFP mask set by upper layers. Also respect global restrictions by applying memalloc_noio_flags to the passed in flags. Link: http://lkml.kernel.org/r/20170127172328.18574-1-l.stach@pengutronix.de Signed-off-by: Lucas Stach Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Radim Krcmar Cc: Catalin Marinas Cc: Will Deacon Cc: Chris Zankel Cc: Ralf Baechle Cc: Paolo Bonzini Cc: Alexander Graf Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0fe0b6295ab5..db373b9d3223 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -541,7 +541,7 @@ static inline bool pm_suspended_storage(void) #if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) /* The below functions must be run on a range from a single zone. */ extern int alloc_contig_range(unsigned long start, unsigned long end, - unsigned migratetype); + unsigned migratetype, gfp_t gfp_mask); extern void free_contig_range(unsigned long pfn, unsigned nr_pages); #endif -- cgit v1.2.3 From e2f466e32f56c8b3ee0d1a40cc39e1c779dfd598 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 24 Feb 2017 14:58:41 -0800 Subject: mm: cma_alloc: allow to specify GFP mask Most users of this interface just want to use it with the default GFP_KERNEL flags, but for cases where DMA memory is allocated it may be called from a different context. No functional change yet, just passing through the flag to the underlying alloc_contig_range function. Link: http://lkml.kernel.org/r/20170127172328.18574-2-l.stach@pengutronix.de Signed-off-by: Lucas Stach Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Radim Krcmar Cc: Catalin Marinas Cc: Will Deacon Cc: Chris Zankel Cc: Ralf Baechle Cc: Paolo Bonzini Cc: Alexander Graf Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cma.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cma.h b/include/linux/cma.h index 6f0a91b37f68..03f32d0bd1d8 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -29,6 +29,7 @@ extern int __init cma_declare_contiguous(phys_addr_t base, extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, struct cma **res_cma); -extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align); +extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, + gfp_t gfp_mask); extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count); #endif -- cgit v1.2.3 From 712c604dcdf8186295e2af694adf52c6842ad100 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 24 Feb 2017 14:58:44 -0800 Subject: mm: wire up GFP flag passing in dma_alloc_from_contiguous The callers of the DMA alloc functions already provide the proper context GFP flags. Make sure to pass them through to the CMA allocator, to make the CMA compaction context aware. Link: http://lkml.kernel.org/r/20170127172328.18574-3-l.stach@pengutronix.de Signed-off-by: Lucas Stach Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Radim Krcmar Cc: Catalin Marinas Cc: Will Deacon Cc: Chris Zankel Cc: Ralf Baechle Cc: Paolo Bonzini Cc: Alexander Graf Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dma-contiguous.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index fec734df1524..b67bf6ac907d 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -112,7 +112,7 @@ static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size, } struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, - unsigned int order); + unsigned int order, gfp_t gfp_mask); bool dma_release_from_contiguous(struct device *dev, struct page *pages, int count); @@ -145,7 +145,7 @@ int dma_declare_contiguous(struct device *dev, phys_addr_t size, static inline struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, - unsigned int order) + unsigned int order, gfp_t gfp_mask) { return NULL; } -- cgit v1.2.3 From def5efe0376501ef7bd6b53ed061512c142e59aa Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 24 Feb 2017 14:58:47 -0800 Subject: mm, madvise: fail with ENOMEM when splitting vma will hit max_map_count If madvise(2) advice will result in the underlying vma being split and the number of areas mapped by the process will exceed /proc/sys/vm/max_map_count as a result, return ENOMEM instead of EAGAIN. EAGAIN is returned by madvise(2) when a kernel resource, such as slab, is temporarily unavailable. It indicates that userspace should retry the advice in the near future. This is important for advice such as MADV_DONTNEED which is often used by malloc implementations to free memory back to the system: we really do want to free memory back when madvise(2) returns EAGAIN because slab allocations (for vmas, anon_vmas, or mempolicies) cannot be allocated. Encountering /proc/sys/vm/max_map_count is not a temporary failure, however, so return ENOMEM to indicate this is a more serious issue. A followup patch to the man page will specify this behavior. Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1701241431120.42507@chino.kir.corp.google.com Signed-off-by: David Rientjes Cc: Jonathan Corbet Cc: Johannes Weiner Cc: Jerome Marchand Cc: "Kirill A. Shutemov" Cc: Michael Kerrisk Cc: Anshuman Khandual Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c6fcba1d1ae5..ed233b002e33 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2041,8 +2041,10 @@ extern struct vm_area_struct *vma_merge(struct mm_struct *, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, struct mempolicy *, struct vm_userfaultfd_ctx); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); -extern int split_vma(struct mm_struct *, - struct vm_area_struct *, unsigned long addr, int new_below); +extern int __split_vma(struct mm_struct *, struct vm_area_struct *, + unsigned long addr, int new_below); +extern int split_vma(struct mm_struct *, struct vm_area_struct *, + unsigned long addr, int new_below); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, struct rb_node **, struct rb_node *); -- cgit v1.2.3 From 3a4f8a0b3ffa733ffbb327685e83b63383127cf6 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 24 Feb 2017 14:59:36 -0800 Subject: mm: remove shmem_mapping() shmem_zero_setup() duplicates Remove the prototypes for shmem_mapping() and shmem_zero_setup() from linux/mm.h, since they are already provided in linux/shmem_fs.h. But shmem_fs.h must then provide the inline stub for shmem_mapping() when CONFIG_SHMEM is not set, and a few more cfiles now need to #include it. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1702081658250.1549@eggly.anvils Signed-off-by: Hugh Dickins Cc: Johannes Weiner Cc: Michal Simek Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 10 ---------- include/linux/shmem_fs.h | 7 +++++++ 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ed233b002e33..0d65dd72c0f4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1168,16 +1168,6 @@ extern void pagefault_out_of_memory(void); extern void show_free_areas(unsigned int flags, nodemask_t *nodemask); -int shmem_zero_setup(struct vm_area_struct *); -#ifdef CONFIG_SHMEM -bool shmem_mapping(struct address_space *mapping); -#else -static inline bool shmem_mapping(struct address_space *mapping) -{ - return false; -} -#endif - extern bool can_do_mlock(void); extern int user_shm_lock(size_t, struct user_struct *); extern void user_shm_unlock(size_t, struct user_struct *); diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index fdaac9d4d46d..a7d6bd2a918f 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -57,7 +57,14 @@ extern int shmem_zero_setup(struct vm_area_struct *); extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); extern int shmem_lock(struct file *file, int lock, struct user_struct *user); +#ifdef CONFIG_SHMEM extern bool shmem_mapping(struct address_space *mapping); +#else +static inline bool shmem_mapping(struct address_space *mapping) +{ + return false; +} +#endif /* CONFIG_SHMEM */ extern void shmem_unlock_mapping(struct address_space *mapping); extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); -- cgit v1.2.3 From dc18d706a4367454ad1fc51e06148d54e8ecfaa0 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Fri, 24 Feb 2017 15:00:02 -0800 Subject: memory-hotplug: use dev_online for memhp_auto_online Commit 31bc3858ea3e ("add automatic onlining policy for the newly added memory") provides the capability to have added memory automatically onlined during add, but this appears to be slightly broken. The current implementation uses walk_memory_range() to call online_memory_block, which uses memory_block_change_state() to online the memory. Instead, we should be calling device_online() for the memory block in online_memory_block(). This would online the memory (the memory bus online routine memory_subsys_online() called from device_online calls memory_block_change_state()) and properly update the device struct offline flag. As a result of the current implementation, attempting to remove a memory block after adding it using auto online fails. This is because doing a remove, for instance echo offline > /sys/devices/system/memory/memoryXXX/state uses device_offline() which checks the dev->offline flag. Link: http://lkml.kernel.org/r/20170222220744.8119.19687.stgit@ltcalpine2-lp14.aus.stglabs.ibm.com Signed-off-by: Nathan Fontenot Cc: Michael Ellerman Cc: Michael Roth Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory.h b/include/linux/memory.h index 093607f90b91..b723a686fc10 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -109,9 +109,6 @@ extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb); extern int register_new_memory(int, struct mem_section *); -extern int memory_block_change_state(struct memory_block *mem, - unsigned long to_state, - unsigned long from_state_req); #ifdef CONFIG_MEMORY_HOTREMOVE extern int unregister_memory_section(struct mem_section *); #endif -- cgit v1.2.3 From f9fa1d919c696e90c887d8742198023e7639d139 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Fri, 24 Feb 2017 15:00:05 -0800 Subject: kasan: drain quarantine of memcg slab objects Per memcg slab accounting and kasan have a problem with kmem_cache destruction. - kmem_cache_create() allocates a kmem_cache, which is used for allocations from processes running in root (top) memcg. - Processes running in non root memcg and allocating with either __GFP_ACCOUNT or from a SLAB_ACCOUNT cache use a per memcg kmem_cache. - Kasan catches use-after-free by having kfree() and kmem_cache_free() defer freeing of objects. Objects are placed in a quarantine. - kmem_cache_destroy() destroys root and non root kmem_caches. It takes care to drain the quarantine of objects from the root memcg's kmem_cache, but ignores objects associated with non root memcg. This causes leaks because quarantined per memcg objects refer to per memcg kmem cache being destroyed. To see the problem: 1) create a slab cache with kmem_cache_create(,,,SLAB_ACCOUNT,) 2) from non root memcg, allocate and free a few objects from cache 3) dispose of the cache with kmem_cache_destroy() kmem_cache_destroy() will trigger a "Slab cache still has objects" warning indicating that the per memcg kmem_cache structure was leaked. Fix the leak by draining kasan quarantined objects allocated from non root memcg. Racing memcg deletion is tricky, but handled. kmem_cache_destroy() => shutdown_memcg_caches() => __shutdown_memcg_cache() => shutdown_cache() flushes per memcg quarantined objects, even if that memcg has been rmdir'd and gone through memcg_deactivate_kmem_caches(). This leak only affects destroyed SLAB_ACCOUNT kmem caches when kasan is enabled. So I don't think it's worth patching stable kernels. Link: http://lkml.kernel.org/r/1482257462-36948-1-git-send-email-gthelen@google.com Signed-off-by: Greg Thelen Reviewed-by: Vladimir Davydov Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 820c0ad54a01..c908b25bf5a5 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -52,7 +52,7 @@ void kasan_free_pages(struct page *page, unsigned int order); void kasan_cache_create(struct kmem_cache *cache, size_t *size, unsigned long *flags); void kasan_cache_shrink(struct kmem_cache *cache); -void kasan_cache_destroy(struct kmem_cache *cache); +void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_poison_slab(struct page *page); void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); @@ -98,7 +98,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache, size_t *size, unsigned long *flags) {} static inline void kasan_cache_shrink(struct kmem_cache *cache) {} -static inline void kasan_cache_destroy(struct kmem_cache *cache) {} +static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} static inline void kasan_poison_slab(struct page *page) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, -- cgit v1.2.3 From 796f571b0c5cf3efd2f652779770fa7bbbc2bb03 Mon Sep 17 00:00:00 2001 From: Lafcadio Wluiki Date: Fri, 24 Feb 2017 15:00:23 -0800 Subject: procfs: use an enum for possible hidepid values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the hidepid parameter was checked by comparing literal integers 0, 1, 2. Let's add a proper enum for this, to make the checking more expressive: 0 → HIDEPID_OFF 1 → HIDEPID_NO_ACCESS 2 → HIDEPID_INVISIBLE This changes the internal labelling only, the userspace-facing interface remains unmodified, and still works with literal integers 0, 1, 2. No functional changes. Link: http://lkml.kernel.org/r/1484572984-13388-2-git-send-email-djalal@gmail.com Signed-off-by: Lafcadio Wluiki Signed-off-by: Djalal Harouni Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid_namespace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 34cce96741bc..c2a989dee876 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -21,6 +21,12 @@ struct pidmap { struct fs_pin; +enum { /* definitions for pid_namespace's hide_pid field */ + HIDEPID_OFF = 0, + HIDEPID_NO_ACCESS = 1, + HIDEPID_INVISIBLE = 2, +}; + struct pid_namespace { struct kref kref; struct pidmap pidmap[PIDMAP_ENTRIES]; -- cgit v1.2.3 From 16f4e3195156f2f06e90d00a36bc48d7a513f253 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 24 Feb 2017 15:00:29 -0800 Subject: include/linux/iopoll.h: include instead of The timer APIs this header needs are ktime_get(), ktime_add_us(), and ktime_compare(). So, including seems enough. This commit will cut unnecessary header file parsing. Link: http://lkml.kernel.org/r/1481679225-10885-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/iopoll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index 1c30014ed176..d29e1e21bf3f 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include #include -- cgit v1.2.3 From a3f0825e7e37d99a02a8a1b1599687667ee50d04 Mon Sep 17 00:00:00 2001 From: Gideon Israel Dsouza Date: Fri, 24 Feb 2017 15:00:32 -0800 Subject: compiler-gcc.h: add a new macro to wrap gcc attribute Add __mode(x) into compiler-gcc.h as part of a cleanup task I've taken up, to replace gcc specific attributes with macros. The next patch is a cleanup of the m68k subsystem and it requires a new macro to wrap __attribute__ ((mode (...))) Link: http://lkml.kernel.org/r/1485540901-1988-2-git-send-email-gidisrael@gmail.com Signed-off-by: Gideon Israel Dsouza Cc: Greg Ungerer Cc: Geert Uytterhoeven Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index fddd1a5eb322..811f7a915658 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -122,6 +122,7 @@ #define __attribute_const__ __attribute__((__const__)) #define __maybe_unused __attribute__((unused)) #define __always_unused __attribute__((unused)) +#define __mode(x) __attribute__((mode(x))) /* gcc version specific checks */ -- cgit v1.2.3 From 85caa95b9f19bb3a26d7e025d1134760b69e0c40 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 24 Feb 2017 15:00:38 -0800 Subject: bug: switch data corruption check to __must_check The CHECK_DATA_CORRUPTION() macro was designed to have callers do something meaningful/protective on failure. However, using "return false" in the macro too strictly limits the design patterns of callers. Instead, let callers handle the logic test directly, but make sure that the result IS checked by forcing __must_check (which appears to not be able to be used directly on macro expressions). Link: http://lkml.kernel.org/r/20170206204547.GA125312@beast Signed-off-by: Kees Cook Suggested-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index baff2e8fc8a8..5828489309bb 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -124,18 +124,20 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr, /* * Since detected data corruption should stop operation on the affected - * structures, this returns false if the corruption condition is found. + * structures. Return value must be checked and sanely acted on by caller. */ +static inline __must_check bool check_data_corruption(bool v) { return v; } #define CHECK_DATA_CORRUPTION(condition, fmt, ...) \ - do { \ - if (unlikely(condition)) { \ + check_data_corruption(({ \ + bool corruption = unlikely(condition); \ + if (corruption) { \ if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \ pr_err(fmt, ##__VA_ARGS__); \ BUG(); \ } else \ WARN(1, fmt, ##__VA_ARGS__); \ - return false; \ } \ - } while (0) + corruption; \ + })) #endif /* _LINUX_BUG_H */ -- cgit v1.2.3 From 4f5901f5a6724f4ed1641e4a94978c5039a1f8c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Fri, 24 Feb 2017 15:01:01 -0800 Subject: linux/kernel.h: fix DIV_ROUND_CLOSEST to support negative divisors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While working on a thermal driver I encounter a scenario where the divisor could be negative, instead of adding local code to handle this I though I first try to add support for this in DIV_ROUND_CLOSEST. Add support to DIV_ROUND_CLOSEST for negative divisors if both dividend and divisor variable types are signed. This should not alter current behavior for users of the macro as previously negative divisors where not supported. Before: DIV_ROUND_CLOSEST( 59, 4) = 15 DIV_ROUND_CLOSEST( 59, -4) = -14 DIV_ROUND_CLOSEST( -59, 4) = -15 DIV_ROUND_CLOSEST( -59, -4) = 14 After: DIV_ROUND_CLOSEST( 59, 4) = 15 DIV_ROUND_CLOSEST( 59, -4) = -15 DIV_ROUND_CLOSEST( -59, 4) = -15 DIV_ROUND_CLOSEST( -59, -4) = 15 [akpm@linux-foundation.org: fix comment, per Guenter] Link: http://lkml.kernel.org/r/20161222102217.29011-1-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Niklas Söderlund Reviewed-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index cb09238f6d32..4c26dc3a8295 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -100,16 +100,18 @@ ) /* - * Divide positive or negative dividend by positive divisor and round - * to closest integer. Result is undefined for negative divisors and - * for negative dividends if the divisor variable type is unsigned. + * Divide positive or negative dividend by positive or negative divisor + * and round to closest integer. Result is undefined for negative + * divisors if he dividend variable type is unsigned and for negative + * dividends if the divisor variable type is unsigned. */ #define DIV_ROUND_CLOSEST(x, divisor)( \ { \ typeof(x) __x = x; \ typeof(divisor) __d = divisor; \ (((typeof(x))-1) > 0 || \ - ((typeof(divisor))-1) > 0 || (__x) > 0) ? \ + ((typeof(divisor))-1) > 0 || \ + (((__x) > 0) == ((__d) > 0))) ? \ (((__x) + ((__d) / 2)) / (__d)) : \ (((__x) - ((__d) / 2)) / (__d)); \ } \ -- cgit v1.2.3 From f231aebfc4cae2f6ed27a46a31e2630909513d77 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 24 Feb 2017 15:01:04 -0800 Subject: rbtree: use designated initializers Prepare to mark sensitive kernel structures for randomization by making sure they're using designated initializers. These were identified during allyesconfig builds of x86, arm, and arm64, with most initializer fixes extracted from grsecurity. Link: http://lkml.kernel.org/r/20161217010253.GA140470@beast Signed-off-by: Kees Cook Acked-by: Peter Zijlstra (Intel) Cc: David Howells Cc: Jie Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rbtree_augmented.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index d076183e49be..9702b6e183bc 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -90,7 +90,9 @@ rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ old->rbaugmented = rbcompute(old); \ } \ rbstatic const struct rb_augment_callbacks rbname = { \ - rbname ## _propagate, rbname ## _copy, rbname ## _rotate \ + .propagate = rbname ## _propagate, \ + .copy = rbname ## _copy, \ + .rotate = rbname ## _rotate \ }; -- cgit v1.2.3 From 4e1a33b105ddf201f66dcc44490c6086a25eca0b Mon Sep 17 00:00:00 2001 From: Sven Schmidt <4sschmid@informatik.uni-hamburg.de> Date: Fri, 24 Feb 2017 15:01:12 -0800 Subject: lib: update LZ4 compressor module Patch series "Update LZ4 compressor module", v7. This patchset updates the LZ4 compression module to a version based on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast which provides an "acceleration" parameter as a tradeoff between high compression ratio and high compression speed. We want to use LZ4 fast in order to support compression in lustre and (mostly, based on that) investigate data reduction techniques in behalf of storage systems. Also, it will be useful for other users of LZ4 compression, as with LZ4 fast it is possible to enable applications to use fast and/or high compression depending on the usecase. For instance, ZRAM is offering a LZ4 backend and could benefit from an updated LZ4 in the kernel. LZ4 homepage: http://www.lz4.org/ LZ4 source repository: https://github.com/lz4/lz4 Source version: 1.7.3 Benchmark (taken from [1], Core i5-4300U @1.9GHz): ----------------|--------------|----------------|---------- Compressor | Compression | Decompression | Ratio ----------------|--------------|----------------|---------- memcpy | 4200 MB/s | 4200 MB/s | 1.000 LZ4 fast 50 | 1080 MB/s | 2650 MB/s | 1.375 LZ4 fast 17 | 680 MB/s | 2220 MB/s | 1.607 LZ4 fast 5 | 475 MB/s | 1920 MB/s | 1.886 LZ4 default | 385 MB/s | 1850 MB/s | 2.101 [1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html [PATCH 1/5] lib: Update LZ4 compressor module [PATCH 2/5] lib/decompress_unlz4: Change module to work with new LZ4 module version [PATCH 3/5] crypto: Change LZ4 modules to work with new LZ4 module version [PATCH 4/5] fs/pstore: fs/squashfs: Change usage of LZ4 to work with new LZ4 version [PATCH 5/5] lib/lz4: Remove back-compat wrappers This patch (of 5): Update the LZ4 kernel module to LZ4 v1.7.3 by Yann Collet. The kernel module is inspired by the previous work by Chanho Min. The updated LZ4 module will not break existing code since the patchset contains appropriate changes. API changes: New method LZ4_compress_fast which differs from the variant available in kernel by the new acceleration parameter, allowing to trade compression ratio for more compression speed and vice versa. LZ4_decompress_fast is the respective decompression method, featuring a very fast decoder (multiple GB/s per core), able to reach RAM speed in multi-core systems. The decompressor allows to decompress data compressed with LZ4 fast as well as the LZ4 HC (high compression) algorithm. Also the useful functions LZ4_decompress_safe_partial and LZ4_compress_destsize were added. The latter reverses the logic by trying to compress as much data as possible from source to dest while the former aims to decompress partial blocks of data. A bunch of streaming functions were also added which allow compressig/decompressing data in multiple steps (so called "streaming mode"). The methods lz4_compress and lz4_decompress_unknownoutputsize are now known as LZ4_compress_default respectivley LZ4_decompress_safe. The old methods will be removed since there's no callers left in the code. [arnd@arndb.de: fix KERNEL_LZ4 support] Link: http://lkml.kernel.org/r/20170208211946.2839649-1-arnd@arndb.de [akpm@linux-foundation.org: simplify] [akpm@linux-foundation.org: fix the simplification] [4sschmid@informatik.uni-hamburg.de: fix performance regressions] Link: http://lkml.kernel.org/r/1486898178-17125-2-git-send-email-4sschmid@informatik.uni-hamburg.de [4sschmid@informatik.uni-hamburg.de: v8] Link: http://lkml.kernel.org/r/1487182598-15351-2-git-send-email-4sschmid@informatik.uni-hamburg.de Link: http://lkml.kernel.org/r/1486321748-19085-2-git-send-email-4sschmid@informatik.uni-hamburg.de Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de> Signed-off-by: Arnd Bergmann Cc: Bongkyu Kim Cc: Rui Salvaterra Cc: Sergey Senozhatsky Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: David S. Miller Cc: Anton Vorontsov Cc: Colin Cross Cc: Kees Cook Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lz4.h | 762 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 696 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lz4.h b/include/linux/lz4.h index 6b784c59f321..1b0f8ca0f9c8 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -1,87 +1,717 @@ -#ifndef __LZ4_H__ -#define __LZ4_H__ -/* - * LZ4 Kernel Interface +/* LZ4 Kernel Interface * * Copyright (C) 2013, LG Electronics, Kyungsik Lee + * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. + * + * This file is based on the original header file + * for LZ4 - Fast LZ compression algorithm. + * + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2016, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * You can contact the author at : + * - LZ4 homepage : http://www.lz4.org + * - LZ4 source repository : https://github.com/lz4/lz4 */ -#define LZ4_MEM_COMPRESS (16384) -#define LZ4HC_MEM_COMPRESS (262144 + (2 * sizeof(unsigned char *))) +#ifndef __LZ4_H__ +#define __LZ4_H__ + +#include +#include /* memset, memcpy */ + +/*-************************************************************************ + * CONSTANTS + **************************************************************************/ /* - * lz4_compressbound() - * Provides the maximum size that LZ4 may output in a "worst case" scenario - * (input data not compressible) + * LZ4_MEMORY_USAGE : + * Memory usage formula : N->2^N Bytes + * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) + * Increasing memory usage improves compression ratio + * Reduced memory usage can improve speed, due to cache effect + * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ -static inline size_t lz4_compressbound(size_t isize) +#define LZ4_MEMORY_USAGE 14 + +#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ +#define LZ4_COMPRESSBOUND(isize) (\ + (unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \ + ? 0 \ + : (isize) + ((isize)/255) + 16) + +#define LZ4_ACCELERATION_DEFAULT 1 +#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) +#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) +#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) + +#define LZ4HC_MIN_CLEVEL 3 +#define LZ4HC_DEFAULT_CLEVEL 9 +#define LZ4HC_MAX_CLEVEL 16 + +#define LZ4HC_DICTIONARY_LOGSIZE 16 +#define LZ4HC_MAXD (1<= LZ4_compressBound(inputSize). + * It also runs faster, so it's a recommended setting. + * If the function cannot compress 'source' into a more limited 'dest' budget, + * compression stops *immediately*, and the function result is zero. + * As a consequence, 'dest' content is not valid. + * + * Return: Number of bytes written into buffer 'dest' + * (necessarily <= maxOutputSize) or 0 if compression fails + */ +int LZ4_compress_default(const char *source, char *dest, int inputSize, + int maxOutputSize, void *wrkmem); + +/** + * LZ4_compress_fast() - As LZ4_compress_default providing an acceleration param + * @source: source address of the original data + * @dest: output buffer address of the compressed data + * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE + * @maxOutputSize: full or partial size of buffer 'dest' + * which must be already allocated + * @acceleration: acceleration factor + * @wrkmem: address of the working memory. + * This requires 'workmem' of LZ4_MEM_COMPRESS. + * + * Same as LZ4_compress_default(), but allows to select an "acceleration" + * factor. The larger the acceleration value, the faster the algorithm, + * but also the lesser the compression. It's a trade-off. It can be fine tuned, + * with each successive value providing roughly +~3% to speed. + * An acceleration value of "1" is the same as regular LZ4_compress_default() + * Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT, which is 1. + * + * Return: Number of bytes written into buffer 'dest' + * (necessarily <= maxOutputSize) or 0 if compression fails + */ +int LZ4_compress_fast(const char *source, char *dest, int inputSize, + int maxOutputSize, int acceleration, void *wrkmem); + +/** + * LZ4_compress_destSize() - Compress as much data as possible + * from source to dest + * @source: source address of the original data + * @dest: output buffer address of the compressed data + * @sourceSizePtr: will be modified to indicate how many bytes where read + * from 'source' to fill 'dest'. New value is necessarily <= old value. + * @targetDestSize: Size of buffer 'dest' which must be already allocated + * @wrkmem: address of the working memory. + * This requires 'workmem' of LZ4_MEM_COMPRESS. + * + * Reverse the logic, by compressing as much data as possible + * from 'source' buffer into already allocated buffer 'dest' + * of size 'targetDestSize'. + * This function either compresses the entire 'source' content into 'dest' + * if it's large enough, or fill 'dest' buffer completely with as much data as + * possible from 'source'. + * + * Return: Number of bytes written into 'dest' (necessarily <= targetDestSize) + * or 0 if compression fails + */ +int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr, + int targetDestSize, void *wrkmem); /* - * lz4_decompress() - * src : source address of the compressed data - * src_len : is the input size, whcih is returned after decompress done - * dest : output buffer address of the decompressed data - * actual_dest_len: is the size of uncompressed data, supposing it's known - * return : Success if return 0 - * Error if return (< 0) - * note : Destination buffer must be already allocated. - * slightly faster than lz4_decompress_unknownoutputsize() + * lz4_compress() - For backward compatibility, see LZ4_compress_default + * @src: source address of the original data + * @src_len: size of the original data + * @dst: output buffer address of the compressed data. This requires 'dst' + * of size LZ4_COMPRESSBOUND + * @dst_len: is the output size, which is returned after compress done + * @workmem: address of the working memory. + * + * Return: Success if return 0, Error if return < 0 */ -int lz4_decompress(const unsigned char *src, size_t *src_len, - unsigned char *dest, size_t actual_dest_len); +int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst, + size_t *dst_len, void *wrkmem); + +/*-************************************************************************ + * Decompression Functions + **************************************************************************/ + +/** + * LZ4_decompress_fast() - Decompresses data from 'source' into 'dest' + * @source: source address of the compressed data + * @dest: output buffer address of the uncompressed data + * which must be already allocated with 'originalSize' bytes + * @originalSize: is the original and therefore uncompressed size + * + * Decompresses data from 'source' into 'dest'. + * This function fully respect memory boundaries for properly formed + * compressed data. + * It is a bit faster than LZ4_decompress_safe(). + * However, it does not provide any protection against intentionally + * modified data stream (malicious input). + * Use this function in trusted environment only + * (data to decode comes from a trusted source). + * + * Return: number of bytes read from the source buffer + * or a negative result if decompression fails. + */ +int LZ4_decompress_fast(const char *source, char *dest, int originalSize); + +/** + * LZ4_decompress_safe() - Decompression protected against buffer overflow + * @source: source address of the compressed data + * @dest: output buffer address of the uncompressed data + * which must be already allocated + * @compressedSize: is the precise full size of the compressed block + * @maxDecompressedSize: is the size of 'dest' buffer + * + * Decompresses data fom 'source' into 'dest'. + * If the source stream is detected malformed, the function will + * stop decoding and return a negative result. + * This function is protected against buffer overflow exploits, + * including malicious data packets. It never writes outside output buffer, + * nor reads outside input buffer. + * + * Return: number of bytes decompressed into destination buffer + * (necessarily <= maxDecompressedSize) + * or a negative result in case of error + */ +int LZ4_decompress_safe(const char *source, char *dest, int compressedSize, + int maxDecompressedSize); + +/** + * LZ4_decompress_safe_partial() - Decompress a block of size 'compressedSize' + * at position 'source' into buffer 'dest' + * @source: source address of the compressed data + * @dest: output buffer address of the decompressed data which must be + * already allocated + * @compressedSize: is the precise full size of the compressed block. + * @targetOutputSize: the decompression operation will try + * to stop as soon as 'targetOutputSize' has been reached + * @maxDecompressedSize: is the size of destination buffer + * + * This function decompresses a compressed block of size 'compressedSize' + * at position 'source' into destination buffer 'dest' + * of size 'maxDecompressedSize'. + * The function tries to stop decompressing operation as soon as + * 'targetOutputSize' has been reached, reducing decompression time. + * This function never writes outside of output buffer, + * and never reads outside of input buffer. + * It is therefore protected against malicious data packets. + * + * Return: the number of bytes decoded in the destination buffer + * (necessarily <= maxDecompressedSize) + * or a negative result in case of error + * + */ +int LZ4_decompress_safe_partial(const char *source, char *dest, + int compressedSize, int targetOutputSize, int maxDecompressedSize); /* - * lz4_decompress_unknownoutputsize() - * src : source address of the compressed data - * src_len : is the input size, therefore the compressed size - * dest : output buffer address of the decompressed data - * dest_len: is the max size of the destination buffer, which is - * returned with actual size of decompressed data after - * decompress done - * return : Success if return 0 - * Error if return (< 0) - * note : Destination buffer must be already allocated. + * lz4_decompress_unknownoutputsize() - For backwards compatibility, + * see LZ4_decompress_safe + * @src: source address of the compressed data + * @src_len: is the input size, therefore the compressed size + * @dest: output buffer address of the decompressed data + * which must be already allocated + * @dest_len: is the max size of the destination buffer, which is + * returned with actual size of decompressed data after decompress done + * + * Return: Success if return 0, Error if return (< 0) */ int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, - unsigned char *dest, size_t *dest_len); + unsigned char *dest, size_t *dest_len); + +/** + * lz4_decompress() - For backwards cocmpatibility, see LZ4_decompress_fast + * @src: source address of the compressed data + * @src_len: is the input size, which is returned after decompress done + * @dest: output buffer address of the decompressed data, + * which must be already allocated + * @actual_dest_len: is the size of uncompressed data, supposing it's known + * + * Return: Success if return 0, Error if return (< 0) + */ +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len); + +/*-************************************************************************ + * LZ4 HC Compression + **************************************************************************/ + +/** + * LZ4_compress_HC() - Compress data from `src` into `dst`, using HC algorithm + * @src: source address of the original data + * @dst: output buffer address of the compressed data + * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE + * @dstCapacity: full or partial size of buffer 'dst', + * which must be already allocated + * @compressionLevel: Recommended values are between 4 and 9, although any + * value between 1 and LZ4HC_MAX_CLEVEL will work. + * Values >LZ4HC_MAX_CLEVEL behave the same as 16. + * @wrkmem: address of the working memory. + * This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS. + * + * Compress data from 'src' into 'dst', using the more powerful + * but slower "HC" algorithm. Compression is guaranteed to succeed if + * `dstCapacity >= LZ4_compressBound(srcSize) + * + * Return : the number of bytes written into 'dst' or 0 if compression fails. + */ +int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity, + int compressionLevel, void *wrkmem); + +/** + * lz4hc_compress() - For backwards compatibility, see LZ4_compress_HC + * @src: source address of the original data + * @src_len: size of the original data + * @dst: output buffer address of the compressed data. This requires 'dst' + * of size LZ4_COMPRESSBOUND. + * @dst_len: is the output size, which is returned after compress done + * @wrkmem: address of the working memory. + * This requires 'workmem' of size LZ4HC_MEM_COMPRESS. + * + * Return : Success if return 0, Error if return (< 0) + */ +int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst, + size_t *dst_len, void *wrkmem); + +/** + * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure + * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure + * @compressionLevel: Recommended values are between 4 and 9, although any + * value between 1 and LZ4HC_MAX_CLEVEL will work. + * Values >LZ4HC_MAX_CLEVEL behave the same as 16. + * + * An LZ4_streamHC_t structure can be allocated once + * and re-used multiple times. + * Use this function to init an allocated `LZ4_streamHC_t` structure + * and start a new compression. + */ +void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel); + +/** + * LZ4_loadDictHC() - Load a static dictionary into LZ4_streamHC + * @streamHCPtr: pointer to the LZ4HC_stream_t + * @dictionary: dictionary to load + * @dictSize: size of dictionary + * + * Use this function to load a static dictionary into LZ4HC_stream. + * Any previous data will be forgotten, only 'dictionary' + * will remain in memory. + * Loading a size of 0 is allowed. + * + * Return : dictionary size, in bytes (necessarily <= 64 KB) + */ +int LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary, + int dictSize); + +/** + * LZ4_compress_HC_continue() - Compress 'src' using data from previously + * compressed blocks as a dictionary using the HC algorithm + * @streamHCPtr: Pointer to the previous 'LZ4_streamHC_t' structure + * @src: source address of the original data + * @dst: output buffer address of the compressed data, + * which must be already allocated + * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE + * @maxDstSize: full or partial size of buffer 'dest' + * which must be already allocated + * + * These functions compress data in successive blocks of any size, using + * previous blocks as dictionary. One key assumption is that previous + * blocks (up to 64 KB) remain read-accessible while + * compressing next blocks. There is an exception for ring buffers, + * which can be smaller than 64 KB. + * Ring buffers scenario is automatically detected and handled by + * LZ4_compress_HC_continue(). + * Before starting compression, state must be properly initialized, + * using LZ4_resetStreamHC(). + * A first "fictional block" can then be designated as + * initial dictionary, using LZ4_loadDictHC() (Optional). + * Then, use LZ4_compress_HC_continue() + * to compress each successive block. Previous memory blocks + * (including initial dictionary when present) must remain accessible + * and unmodified during compression. + * 'dst' buffer should be sized to handle worst case scenarios, using + * LZ4_compressBound(), to ensure operation success. + * If, for any reason, previous data blocks can't be preserved unmodified + * in memory during next compression block, + * you must save it to a safer memory space, using LZ4_saveDictHC(). + * Return value of LZ4_saveDictHC() is the size of dictionary + * effectively saved into 'safeBuffer'. + * + * Return: Number of bytes written into buffer 'dst' or 0 if compression fails + */ +int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src, + char *dst, int srcSize, int maxDstSize); + +/** + * LZ4_saveDictHC() - Save static dictionary from LZ4HC_stream + * @streamHCPtr: pointer to the 'LZ4HC_stream_t' structure + * @safeBuffer: buffer to save dictionary to, must be already allocated + * @maxDictSize: size of 'safeBuffer' + * + * If previously compressed data block is not guaranteed + * to remain available at its memory location, + * save it into a safer place (char *safeBuffer). + * Note : you don't need to call LZ4_loadDictHC() afterwards, + * dictionary is immediately usable, you can therefore call + * LZ4_compress_HC_continue(). + * + * Return : saved dictionary size in bytes (necessarily <= maxDictSize), + * or 0 if error. + */ +int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer, + int maxDictSize); + +/*-********************************************* + * Streaming Compression Functions + ***********************************************/ + +/** + * LZ4_resetStream() - Init an allocated 'LZ4_stream_t' structure + * @LZ4_stream: pointer to the 'LZ4_stream_t' structure + * + * An LZ4_stream_t structure can be allocated once + * and re-used multiple times. + * Use this function to init an allocated `LZ4_stream_t` structure + * and start a new compression. + */ +void LZ4_resetStream(LZ4_stream_t *LZ4_stream); + +/** + * LZ4_loadDict() - Load a static dictionary into LZ4_stream + * @streamPtr: pointer to the LZ4_stream_t + * @dictionary: dictionary to load + * @dictSize: size of dictionary + * + * Use this function to load a static dictionary into LZ4_stream. + * Any previous data will be forgotten, only 'dictionary' + * will remain in memory. + * Loading a size of 0 is allowed. + * + * Return : dictionary size, in bytes (necessarily <= 64 KB) + */ +int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary, + int dictSize); + +/** + * LZ4_saveDict() - Save static dictionary from LZ4_stream + * @streamPtr: pointer to the 'LZ4_stream_t' structure + * @safeBuffer: buffer to save dictionary to, must be already allocated + * @dictSize: size of 'safeBuffer' + * + * If previously compressed data block is not guaranteed + * to remain available at its memory location, + * save it into a safer place (char *safeBuffer). + * Note : you don't need to call LZ4_loadDict() afterwards, + * dictionary is immediately usable, you can therefore call + * LZ4_compress_fast_continue(). + * + * Return : saved dictionary size in bytes (necessarily <= dictSize), + * or 0 if error. + */ +int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize); + +/** + * LZ4_compress_fast_continue() - Compress 'src' using data from previously + * compressed blocks as a dictionary + * @streamPtr: Pointer to the previous 'LZ4_stream_t' structure + * @src: source address of the original data + * @dst: output buffer address of the compressed data, + * which must be already allocated + * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE + * @maxDstSize: full or partial size of buffer 'dest' + * which must be already allocated + * @acceleration: acceleration factor + * + * Compress buffer content 'src', using data from previously compressed blocks + * as dictionary to improve compression ratio. + * Important : Previous data blocks are assumed to still + * be present and unmodified ! + * If maxDstSize >= LZ4_compressBound(srcSize), + * compression is guaranteed to succeed, and runs faster. + * + * Return: Number of bytes written into buffer 'dst' or 0 if compression fails + */ +int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src, + char *dst, int srcSize, int maxDstSize, int acceleration); + +/** + * LZ4_setStreamDecode() - Instruct where to find dictionary + * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure + * @dictionary: dictionary to use + * @dictSize: size of dictionary + * + * Use this function to instruct where to find the dictionary. + * Setting a size of 0 is allowed (same effect as reset). + * + * Return: 1 if OK, 0 if error + */ +int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode, + const char *dictionary, int dictSize); + +/** + * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode + * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure + * @source: source address of the compressed data + * @dest: output buffer address of the uncompressed data + * which must be already allocated + * @compressedSize: is the precise full size of the compressed block + * @maxDecompressedSize: is the size of 'dest' buffer + * + * These decoding function allows decompression of multiple blocks + * in "streaming" mode. + * Previously decoded blocks *must* remain available at the memory position + * where they were decoded (up to 64 KB) + * In the case of a ring buffers, decoding buffer must be either : + * - Exactly same size as encoding buffer, with same update rule + * (block boundaries at same positions) In which case, + * the decoding & encoding ring buffer can have any size, + * including very small ones ( < 64 KB). + * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. + * maxBlockSize is implementation dependent. + * It's the maximum size you intend to compress into a single block. + * In which case, encoding and decoding buffers do not need + * to be synchronized, and encoding ring buffer can have any size, + * including small ones ( < 64 KB). + * - _At least_ 64 KB + 8 bytes + maxBlockSize. + * In which case, encoding and decoding buffers do not need to be + * synchronized, and encoding ring buffer can have any size, + * including larger than decoding buffer. W + * Whenever these conditions are not possible, save the last 64KB of decoded + * data into a safe buffer, and indicate where it is saved + * using LZ4_setStreamDecode() + * + * Return: number of bytes decompressed into destination buffer + * (necessarily <= maxDecompressedSize) + * or a negative result in case of error + */ +int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode, + const char *source, char *dest, int compressedSize, + int maxDecompressedSize); + +/** + * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode + * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure + * @source: source address of the compressed data + * @dest: output buffer address of the uncompressed data + * which must be already allocated with 'originalSize' bytes + * @originalSize: is the original and therefore uncompressed size + * + * These decoding function allows decompression of multiple blocks + * in "streaming" mode. + * Previously decoded blocks *must* remain available at the memory position + * where they were decoded (up to 64 KB) + * In the case of a ring buffers, decoding buffer must be either : + * - Exactly same size as encoding buffer, with same update rule + * (block boundaries at same positions) In which case, + * the decoding & encoding ring buffer can have any size, + * including very small ones ( < 64 KB). + * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. + * maxBlockSize is implementation dependent. + * It's the maximum size you intend to compress into a single block. + * In which case, encoding and decoding buffers do not need + * to be synchronized, and encoding ring buffer can have any size, + * including small ones ( < 64 KB). + * - _At least_ 64 KB + 8 bytes + maxBlockSize. + * In which case, encoding and decoding buffers do not need to be + * synchronized, and encoding ring buffer can have any size, + * including larger than decoding buffer. W + * Whenever these conditions are not possible, save the last 64KB of decoded + * data into a safe buffer, and indicate where it is saved + * using LZ4_setStreamDecode() + * + * Return: number of bytes decompressed into destination buffer + * (necessarily <= maxDecompressedSize) + * or a negative result in case of error + */ +int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode, + const char *source, char *dest, int originalSize); + +/** + * LZ4_decompress_safe_usingDict() - Same as LZ4_setStreamDecode() + * followed by LZ4_decompress_safe_continue() + * @source: source address of the compressed data + * @dest: output buffer address of the uncompressed data + * which must be already allocated + * @compressedSize: is the precise full size of the compressed block + * @maxDecompressedSize: is the size of 'dest' buffer + * @dictStart: pointer to the start of the dictionary in memory + * @dictSize: size of dictionary + * + * These decoding function works the same as + * a combination of LZ4_setStreamDecode() followed by + * LZ4_decompress_safe_continue() + * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure. + * + * Return: number of bytes decompressed into destination buffer + * (necessarily <= maxDecompressedSize) + * or a negative result in case of error + */ +int LZ4_decompress_safe_usingDict(const char *source, char *dest, + int compressedSize, int maxDecompressedSize, const char *dictStart, + int dictSize); + +/** + * LZ4_decompress_fast_usingDict() - Same as LZ4_setStreamDecode() + * followed by LZ4_decompress_fast_continue() + * @source: source address of the compressed data + * @dest: output buffer address of the uncompressed data + * which must be already allocated with 'originalSize' bytes + * @originalSize: is the original and therefore uncompressed size + * @dictStart: pointer to the start of the dictionary in memory + * @dictSize: size of dictionary + * + * These decoding function works the same as + * a combination of LZ4_setStreamDecode() followed by + * LZ4_decompress_safe_continue() + * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure. + * + * Return: number of bytes decompressed into destination buffer + * (necessarily <= maxDecompressedSize) + * or a negative result in case of error + */ +int LZ4_decompress_fast_usingDict(const char *source, char *dest, + int originalSize, const char *dictStart, int dictSize); + #endif -- cgit v1.2.3 From 69c78423b8f439b077929410bdf8f88e7031b891 Mon Sep 17 00:00:00 2001 From: Sven Schmidt <4sschmid@informatik.uni-hamburg.de> Date: Fri, 24 Feb 2017 15:01:25 -0800 Subject: lib/lz4: remove back-compat wrappers Remove the functions introduced as wrappers for providing backwards compatibility to the prior LZ4 version. They're not needed anymore since there's no callers left. Link: http://lkml.kernel.org/r/1486321748-19085-6-git-send-email-4sschmid@informatik.uni-hamburg.de Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de> Cc: Bongkyu Kim Cc: Rui Salvaterra Cc: Sergey Senozhatsky Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: David S. Miller Cc: Anton Vorontsov Cc: Colin Cross Cc: Kees Cook Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lz4.h | 69 ----------------------------------------------------- 1 file changed, 69 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lz4.h b/include/linux/lz4.h index 1b0f8ca0f9c8..394e3d9213b8 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -172,18 +172,6 @@ static inline int LZ4_compressBound(size_t isize) return LZ4_COMPRESSBOUND(isize); } -/** - * lz4_compressbound() - For backwards compatibility; see LZ4_compressBound - * @isize: Size of the input data - * - * Return: Max. size LZ4 may output in a "worst case" szenario - * (data not compressible) - */ -static inline int lz4_compressbound(size_t isize) -{ - return LZ4_COMPRESSBOUND(isize); -} - /** * LZ4_compress_default() - Compress data from source to dest * @source: source address of the original data @@ -257,20 +245,6 @@ int LZ4_compress_fast(const char *source, char *dest, int inputSize, int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr, int targetDestSize, void *wrkmem); -/* - * lz4_compress() - For backward compatibility, see LZ4_compress_default - * @src: source address of the original data - * @src_len: size of the original data - * @dst: output buffer address of the compressed data. This requires 'dst' - * of size LZ4_COMPRESSBOUND - * @dst_len: is the output size, which is returned after compress done - * @workmem: address of the working memory. - * - * Return: Success if return 0, Error if return < 0 - */ -int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst, - size_t *dst_len, void *wrkmem); - /*-************************************************************************ * Decompression Functions **************************************************************************/ @@ -346,34 +320,6 @@ int LZ4_decompress_safe(const char *source, char *dest, int compressedSize, int LZ4_decompress_safe_partial(const char *source, char *dest, int compressedSize, int targetOutputSize, int maxDecompressedSize); -/* - * lz4_decompress_unknownoutputsize() - For backwards compatibility, - * see LZ4_decompress_safe - * @src: source address of the compressed data - * @src_len: is the input size, therefore the compressed size - * @dest: output buffer address of the decompressed data - * which must be already allocated - * @dest_len: is the max size of the destination buffer, which is - * returned with actual size of decompressed data after decompress done - * - * Return: Success if return 0, Error if return (< 0) - */ -int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, - unsigned char *dest, size_t *dest_len); - -/** - * lz4_decompress() - For backwards cocmpatibility, see LZ4_decompress_fast - * @src: source address of the compressed data - * @src_len: is the input size, which is returned after decompress done - * @dest: output buffer address of the decompressed data, - * which must be already allocated - * @actual_dest_len: is the size of uncompressed data, supposing it's known - * - * Return: Success if return 0, Error if return (< 0) - */ -int lz4_decompress(const unsigned char *src, size_t *src_len, - unsigned char *dest, size_t actual_dest_len); - /*-************************************************************************ * LZ4 HC Compression **************************************************************************/ @@ -400,21 +346,6 @@ int lz4_decompress(const unsigned char *src, size_t *src_len, int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity, int compressionLevel, void *wrkmem); -/** - * lz4hc_compress() - For backwards compatibility, see LZ4_compress_HC - * @src: source address of the original data - * @src_len: size of the original data - * @dst: output buffer address of the compressed data. This requires 'dst' - * of size LZ4_COMPRESSBOUND. - * @dst_len: is the output size, which is returned after compress done - * @wrkmem: address of the working memory. - * This requires 'workmem' of size LZ4HC_MEM_COMPRESS. - * - * Return : Success if return 0, Error if return (< 0) - */ -int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst, - size_t *dst_len, void *wrkmem); - /** * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure -- cgit v1.2.3 From 3d1e236022cc1426b0834565995ddee2ca231cee Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 24 Feb 2017 22:31:02 -0600 Subject: objtool: Prevent GCC from merging annotate_unreachable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 0-day bot reported some new objtool warnings which were caused by the new annotate_unreachable() macro: fs/afs/flock.o: warning: objtool: afs_do_unlk()+0x0: duplicate frame pointer save fs/afs/flock.o: warning: objtool: afs_do_unlk()+0x0: frame pointer state mismatch fs/btrfs/delayed-inode.o: warning: objtool: btrfs_delete_delayed_dir_index()+0x0: duplicate frame pointer save fs/btrfs/delayed-inode.o: warning: objtool: btrfs_delete_delayed_dir_index()+0x0: frame pointer state mismatch fs/dlm/lock.o: warning: objtool: _grant_lock()+0x0: duplicate frame pointer save fs/dlm/lock.o: warning: objtool: _grant_lock()+0x0: frame pointer state mismatch fs/ocfs2/alloc.o: warning: objtool: ocfs2_mv_path()+0x0: duplicate frame pointer save fs/ocfs2/alloc.o: warning: objtool: ocfs2_mv_path()+0x0: frame pointer state mismatch It turns out that, for older versions of GCC, if a function has multiple BUG() incantations, GCC will sometimes merge the corresponding annotate_unreachable() inline asm statements into a single block. That has the undesirable effect of removing one of the entries in the __unreachable section, confusing objtool greatly. A workaround for this issue is to ensure that each instance of the inline asm statement uses a different label, so that GCC sees the statements are unique and leaves them alone. The inline asm ‘%=’ token could be used for that, but unfortunately older versions of GCC don't support it. So I implemented a poor man's version of it with the __LINE__ macro. Reported-by: kbuild test robot Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends") Link: http://lkml.kernel.org/r/0c14b00baf9f68d1b0221ddb6c88b925181c8be8.1487997036.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 8ea159fc489d..de471346b365 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -197,10 +197,10 @@ #ifdef CONFIG_STACK_VALIDATION #define annotate_unreachable() ({ \ - asm("1:\t\n" \ + asm("%c0:\t\n" \ ".pushsection __unreachable, \"a\"\t\n" \ - ".long 1b\t\n" \ - ".popsection\t\n"); \ + ".long %c0b\t\n" \ + ".popsection\t\n" : : "i" (__LINE__)); \ }) #else #define annotate_unreachable() -- cgit v1.2.3 From 6dbf5cea05a7098a69f294c96b6d76f08562cae5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 24 Feb 2017 13:25:14 +0100 Subject: cpuidle: menu: Avoid taking spinlock for accessing QoS values After commit 9908859acaa9 (cpuidle/menu: add per CPU PM QoS resume latency consideration) the cpuidle menu governor calls dev_pm_qos_read_value() on CPU devices to read the current resume latency QoS constraint values for them. That function takes a spinlock to prevent the device's power.qos pointer from becoming NULL during the access which is a problem for the RT patchset where spinlocks are converted into mutexes and the idle loop stops working. However, it is not even necessary for the menu governor to take that spinlock, because the power.qos pointer accessed under it cannot be modified during the access anyway. For this reason, introduce a "raw" routine for accessing device QoS resume latency constraints without locking and use it in the menu governor. Fixes: 9908859acaa9 (cpuidle/menu: add per CPU PM QoS resume latency consideration) Acked-by: Alex Shi Signed-off-by: Rafael J. Wysocki --- include/linux/pm_qos.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 0f65d36c2a75..5aba9f47899d 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -173,6 +173,12 @@ static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return dev->power.qos->flags_req->data.flr.flags; } + +static inline s32 dev_pm_qos_raw_read_value(struct device *dev) +{ + return IS_ERR_OR_NULL(dev->power.qos) ? + 0 : pm_qos_read_value(&dev->power.qos->resume_latency); +} #else static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask) @@ -237,6 +243,7 @@ static inline void dev_pm_qos_hide_latency_tolerance(struct device *dev) {} static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev) { return 0; } static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return 0; } +static inline s32 dev_pm_qos_raw_read_value(struct device *dev) { return 0; } #endif #endif -- cgit v1.2.3 From 51be7a9a261ce18c520fb3928b168feb77522745 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 12 Jan 2017 23:36:32 +0200 Subject: virtio_mmio: expose header to userspace It's handy for userspace emulators like QEMU. Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_mmio.h | 141 -------------------------------------------- 1 file changed, 141 deletions(-) delete mode 100644 include/linux/virtio_mmio.h (limited to 'include/linux') diff --git a/include/linux/virtio_mmio.h b/include/linux/virtio_mmio.h deleted file mode 100644 index c4b09689ab64..000000000000 --- a/include/linux/virtio_mmio.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Virtio platform device driver - * - * Copyright 2011, ARM Ltd. - * - * Based on Virtio PCI driver by Anthony Liguori, copyright IBM Corp. 2007 - * - * This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of IBM nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _LINUX_VIRTIO_MMIO_H -#define _LINUX_VIRTIO_MMIO_H - -/* - * Control registers - */ - -/* Magic value ("virt" string) - Read Only */ -#define VIRTIO_MMIO_MAGIC_VALUE 0x000 - -/* Virtio device version - Read Only */ -#define VIRTIO_MMIO_VERSION 0x004 - -/* Virtio device ID - Read Only */ -#define VIRTIO_MMIO_DEVICE_ID 0x008 - -/* Virtio vendor ID - Read Only */ -#define VIRTIO_MMIO_VENDOR_ID 0x00c - -/* Bitmask of the features supported by the device (host) - * (32 bits per set) - Read Only */ -#define VIRTIO_MMIO_DEVICE_FEATURES 0x010 - -/* Device (host) features set selector - Write Only */ -#define VIRTIO_MMIO_DEVICE_FEATURES_SEL 0x014 - -/* Bitmask of features activated by the driver (guest) - * (32 bits per set) - Write Only */ -#define VIRTIO_MMIO_DRIVER_FEATURES 0x020 - -/* Activated features set selector - Write Only */ -#define VIRTIO_MMIO_DRIVER_FEATURES_SEL 0x024 - - -#ifndef VIRTIO_MMIO_NO_LEGACY /* LEGACY DEVICES ONLY! */ - -/* Guest's memory page size in bytes - Write Only */ -#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 - -#endif - - -/* Queue selector - Write Only */ -#define VIRTIO_MMIO_QUEUE_SEL 0x030 - -/* Maximum size of the currently selected queue - Read Only */ -#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 - -/* Queue size for the currently selected queue - Write Only */ -#define VIRTIO_MMIO_QUEUE_NUM 0x038 - - -#ifndef VIRTIO_MMIO_NO_LEGACY /* LEGACY DEVICES ONLY! */ - -/* Used Ring alignment for the currently selected queue - Write Only */ -#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c - -/* Guest's PFN for the currently selected queue - Read Write */ -#define VIRTIO_MMIO_QUEUE_PFN 0x040 - -#endif - - -/* Ready bit for the currently selected queue - Read Write */ -#define VIRTIO_MMIO_QUEUE_READY 0x044 - -/* Queue notifier - Write Only */ -#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 - -/* Interrupt status - Read Only */ -#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 - -/* Interrupt acknowledge - Write Only */ -#define VIRTIO_MMIO_INTERRUPT_ACK 0x064 - -/* Device status register - Read Write */ -#define VIRTIO_MMIO_STATUS 0x070 - -/* Selected queue's Descriptor Table address, 64 bits in two halves */ -#define VIRTIO_MMIO_QUEUE_DESC_LOW 0x080 -#define VIRTIO_MMIO_QUEUE_DESC_HIGH 0x084 - -/* Selected queue's Available Ring address, 64 bits in two halves */ -#define VIRTIO_MMIO_QUEUE_AVAIL_LOW 0x090 -#define VIRTIO_MMIO_QUEUE_AVAIL_HIGH 0x094 - -/* Selected queue's Used Ring address, 64 bits in two halves */ -#define VIRTIO_MMIO_QUEUE_USED_LOW 0x0a0 -#define VIRTIO_MMIO_QUEUE_USED_HIGH 0x0a4 - -/* Configuration atomicity value */ -#define VIRTIO_MMIO_CONFIG_GENERATION 0x0fc - -/* The config space is defined by each driver as - * the per-driver configuration space - Read Write */ -#define VIRTIO_MMIO_CONFIG 0x100 - - - -/* - * Interrupt flags (re: interrupt status & acknowledge registers) - */ - -#define VIRTIO_MMIO_INT_VRING (1 << 0) -#define VIRTIO_MMIO_INT_CONFIG (1 << 1) - -#endif -- cgit v1.2.3 From 22ad0b6ab46683975c6da032f1c2593066c7b3bd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Feb 2017 10:38:09 -0800 Subject: f2fs: add bitmaps for empty or full NAT blocks This patches adds bitmaps to represent empty or full NAT blocks containing free nid entries. If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll use these bitmaps when building free nids. In order to avoid checkpointing burden, up-to-date bitmaps will be flushed only during umount time. So, normally we can get this gain, but when power-cut happens, we rely on fsck.f2fs which recovers this bitmap again. After this patch, we build free nids from nid #0 at mount time to make more full NAT blocks, but in runtime, we check empty NAT blocks to load free nids without loading any NAT pages from disk. Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index f0748524ca8c..1c92ace2e8f8 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -114,6 +114,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_NAT_BITS_FLAG 0x00000080 #define CP_CRC_RECOVERY_FLAG 0x00000040 #define CP_FASTBOOT_FLAG 0x00000020 #define CP_FSCK_FLAG 0x00000010 -- cgit v1.2.3 From 4ac912427c4214d8031d9ad6fbc3bc75e71512df Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 23 Feb 2017 10:53:49 +0800 Subject: f2fs: introduce free nid bitmap In scenario of intensively node allocation, free nids will be ran out soon, then it needs to stop to load free nids by traversing NAT blocks, in worse case, if NAT blocks does not be cached in memory, it generates IOs which slows down our foreground operations. In order to speed up node allocation, in this patch we introduce a new free_nid_bitmap array, so there is an bitmap table for each NAT block, Once the NAT block is loaded, related bitmap cache will be switched on, and bitmap will be set during traversing nat entries in NAT block, later we can query and update nid usage status in memory completely. With such implementation, I expect performance of node allocation can be improved in the long-term after filesystem image is mounted. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 1c92ace2e8f8..e2d239ed4c60 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -279,6 +279,7 @@ struct f2fs_node { * For NAT entries */ #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) +#define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8) struct f2fs_nat_entry { __u8 version; /* latest version of cached nat entry */ -- cgit v1.2.3 From fb5e31d970ce8b4941f03ed765d7dbefc39f22d9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Feb 2017 18:15:22 +0100 Subject: virtio: allow drivers to request IRQ affinity when creating VQs Add a struct irq_affinity pointer to the find_vqs methods, which if set is used to tell the PCI layer to create the MSI-X vectors for our I/O virtqueues with the proper affinity from the start. Compared to after the fact affinity hints this gives us an instantly working setup and allows to allocate the irq descritors node-local and avoid interconnect traffic. Last but not least this will allow blk-mq queues are created based on the interrupt affinity for storage drivers. Signed-off-by: Christoph Hellwig Reviewed-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 26c155bb639b..2ebe506fe41a 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -7,6 +7,8 @@ #include #include +struct irq_affinity; + /** * virtio_config_ops - operations for configuring a virtio device * @get: read the value of a configuration field @@ -68,9 +70,8 @@ struct virtio_config_ops { void (*set_status)(struct virtio_device *vdev, u8 status); void (*reset)(struct virtio_device *vdev); int (*find_vqs)(struct virtio_device *, unsigned nvqs, - struct virtqueue *vqs[], - vq_callback_t *callbacks[], - const char * const names[]); + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], struct irq_affinity *desc); void (*del_vqs)(struct virtio_device *); u64 (*get_features)(struct virtio_device *vdev); int (*finalize_features)(struct virtio_device *vdev); @@ -169,7 +170,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, vq_callback_t *callbacks[] = { c }; const char *names[] = { n }; struct virtqueue *vq; - int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names); + int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL); if (err < 0) return ERR_PTR(err); return vq; -- cgit v1.2.3 From bbaba479563910aaa51e59bb9027a09e396d3a3c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Feb 2017 18:15:23 +0100 Subject: virtio: provide a method to get the IRQ affinity mask for a virtqueue This basically passed up the pci_irq_get_affinity information through virtio through an optional get_vq_affinity method. It is only implemented by the PCI backend for now, and only when we use per-virtqueue IRQs. Signed-off-by: Christoph Hellwig Reviewed-by: Jason Wang Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 2ebe506fe41a..8355bab175e1 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -58,6 +58,7 @@ struct irq_affinity; * This returns a pointer to the bus name a la pci_name from which * the caller can then copy. * @set_vq_affinity: set the affinity for a virtqueue. + * @get_vq_affinity: get the affinity for a virtqueue (optional). */ typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { @@ -77,6 +78,8 @@ struct virtio_config_ops { int (*finalize_features)(struct virtio_device *vdev); const char *(*bus_name)(struct virtio_device *vdev); int (*set_vq_affinity)(struct virtqueue *vq, int cpu); + const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev, + int index); }; /* If driver didn't advertise the feature, it will never appear. */ -- cgit v1.2.3 From 73473427bb551686e4b68ecd99bfd27e6635286a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Feb 2017 18:15:24 +0100 Subject: blk-mq: provide a default queue mapping for virtio device Similar to the PCI version, just calling into virtio instead. Signed-off-by: Christoph Hellwig Signed-off-by: Michael S. Tsirkin --- include/linux/blk-mq-virtio.h | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 include/linux/blk-mq-virtio.h (limited to 'include/linux') diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h new file mode 100644 index 000000000000..b1ef6e14744f --- /dev/null +++ b/include/linux/blk-mq-virtio.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_BLK_MQ_VIRTIO_H +#define _LINUX_BLK_MQ_VIRTIO_H + +struct blk_mq_tag_set; +struct virtio_device; + +int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, + struct virtio_device *vdev, int first_vec); + +#endif /* _LINUX_BLK_MQ_VIRTIO_H */ -- cgit v1.2.3 From 0d9f0a52c8b9f7a003fe1650b7d5fb8518efabe0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Feb 2017 18:15:26 +0100 Subject: virtio_scsi: use virtio IRQ affinity Use automatic IRQ affinity assignment in the virtio layer if available, and build the blk-mq queues based on it. Signed-off-by: Christoph Hellwig Signed-off-by: Michael S. Tsirkin --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 921acaaa1601..01aea80a503e 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -26,7 +26,6 @@ enum cpuhp_state { CPUHP_ARM_OMAP_WAKE_DEAD, CPUHP_IRQ_POLL_DEAD, CPUHP_BLOCK_SOFTIRQ_DEAD, - CPUHP_VIRT_SCSI_DEAD, CPUHP_ACPI_CPUDRV_DEAD, CPUHP_S390_PFAULT_DEAD, CPUHP_BLK_MQ_DEAD, -- cgit v1.2.3 From 7d134b2ce639448199052fd573a324f7e7cd5ed8 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 27 Feb 2017 14:26:56 -0800 Subject: kprobes: move kprobe declarations to asm-generic/kprobes.h Often all is needed is these small helpers, instead of compiler.h or a full kprobes.h. This is important for asm helpers, in fact even some asm/kprobes.h make use of these helpers... instead just keep a generic asm file with helpers useful for asm code with the least amount of clutter as possible. Likewise we need now to also address what to do about this file for both when architectures have CONFIG_HAVE_KPROBES, and when they do not. Then for when architectures have CONFIG_HAVE_KPROBES but have disabled CONFIG_KPROBES. Right now most asm/kprobes.h do not have guards against CONFIG_KPROBES, this means most architecture code cannot include asm/kprobes.h safely. Correct this and add guards for architectures missing them. Additionally provide architectures that not have kprobes support with the default asm-generic solution. This lets us force asm/kprobes.h on the header include/linux/kprobes.h always, but most importantly we can now safely include just asm/kprobes.h on architecture code without bringing the full kitchen sink of header files. Two architectures already provided a guard against CONFIG_KPROBES on its kprobes.h: sh, arch. The rest of the architectures needed gaurds added. We avoid including any not-needed headers on asm/kprobes.h unless kprobes have been enabled. In a subsequent atomic change we can try now to remove compiler.h from include/linux/kprobes.h. During this sweep I've also identified a few architectures defining a common macro needed for both kprobes and ftrace, that of the definition of the breakput instruction up. Some refer to this as BREAKPOINT_INSTRUCTION. This must be kept outside of the #ifdef CONFIG_KPROBES guard. [mcgrof@kernel.org: fix arm64 build] Link: http://lkml.kernel.org/r/CAB=NE6X1WMByuARS4mZ1g9+W=LuVBnMDnh_5zyN0CLADaVh=Jw@mail.gmail.com [sfr@canb.auug.org.au: fixup for kprobes declarations moving] Link: http://lkml.kernel.org/r/20170214165933.13ebd4f4@canb.auug.org.au Link: http://lkml.kernel.org/r/20170203233139.32682-1-mcgrof@kernel.org Signed-off-by: Luis R. Rodriguez Signed-off-by: Stephen Rothwell Acked-by: Masami Hiramatsu Cc: Arnd Bergmann Cc: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S. Miller Cc: Ingo Molnar Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Andy Lutomirski Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 8 -------- include/linux/kprobes.h | 19 +++---------------- 2 files changed, 3 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 91c30cba984e..b2eb9c0a68c4 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -570,12 +570,4 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s (_________p1); \ }) -/* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */ -#ifdef CONFIG_KPROBES -# define __kprobes __attribute__((__section__(".kprobes.text"))) -# define nokprobe_inline __always_inline -#else -# define __kprobes -# define nokprobe_inline inline -#endif #endif /* __LINUX_COMPILER_H */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 16ddfb8b304a..c328e4f7dcad 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -29,7 +29,7 @@ * and Prasanna S Panchamukhi * added function-return probes. */ -#include /* for __kprobes */ +#include #include #include #include @@ -40,9 +40,9 @@ #include #include #include +#include #ifdef CONFIG_KPROBES -#include /* kprobe_status settings */ #define KPROBE_HIT_ACTIVE 0x00000001 @@ -51,6 +51,7 @@ #define KPROBE_HIT_SSDONE 0x00000008 #else /* CONFIG_KPROBES */ +#include typedef int kprobe_opcode_t; struct arch_specific_insn { int dummy; @@ -509,18 +510,4 @@ static inline bool is_kprobe_optinsn_slot(unsigned long addr) } #endif -#ifdef CONFIG_KPROBES -/* - * Blacklist ganerating macro. Specify functions which is not probed - * by using this macro. - */ -#define __NOKPROBE_SYMBOL(fname) \ -static unsigned long __used \ - __attribute__((section("_kprobe_blacklist"))) \ - _kbl_addr_##fname = (unsigned long)fname; -#define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname) -#else -#define NOKPROBE_SYMBOL(fname) -#endif - #endif /* _LINUX_KPROBES_H */ -- cgit v1.2.3 From 441398d378f29a5ad6d0fcda07918e54e4961800 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Mon, 27 Feb 2017 14:27:25 -0800 Subject: sigaltstack: support SS_AUTODISARM for CONFIG_COMPAT Currently SS_AUTODISARM is not supported in compatibility mode, but does not return -EINVAL either. This makes dosemu built with -m32 on x86_64 to crash. Also the kernel's sigaltstack selftest fails if compiled with -m32. This patch adds the needed support. Link: http://lkml.kernel.org/r/20170205101213.8163-2-stsp@list.ru Signed-off-by: Stas Sergeev Cc: Milosz Tanski Cc: Andy Lutomirski Cc: Al Viro Cc: Arnd Bergmann Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Oleg Nesterov Cc: Nicolas Pitre Cc: Waiman Long Cc: Dave Hansen Cc: Dmitry Safonov Cc: Wang Xiaoqiang Cc: Oleg Nesterov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 9e40be522793..aef47be2a5c1 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -711,8 +711,10 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long); compat_stack_t __user *__uss = uss; \ struct task_struct *t = current; \ put_user_ex(ptr_to_compat((void __user *)t->sas_ss_sp), &__uss->ss_sp); \ - put_user_ex(sas_ss_flags(sp), &__uss->ss_flags); \ + put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \ put_user_ex(t->sas_ss_size, &__uss->ss_size); \ + if (t->sas_ss_flags & SS_AUTODISARM) \ + sas_ss_reset(t); \ } while (0); asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, -- cgit v1.2.3 From e3b5a342ab9643b4079c8d417d90519388469341 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 27 Feb 2017 14:27:37 -0800 Subject: include/linux/pid.h: use for_each_thread() in do_each_pid_thread() while_each_pid_thread() is using while_each_thread(), which is unsafe under RCU lock according to commit 0c740d0afc3b ("introduce for_each_thread() to replace the buggy while_each_thread()"). Use for_each_thread() in do_each_pid_thread() which is safe under RCU lock. Link: http://lkml.kernel.org/r/201702011947.DBD56740.OMVHOLOtSJFFFQ@I-love.SAKURA.ne.jp Link: http://lkml.kernel.org/r/1486041779-4401-2-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index 23705a53abba..298ead5512e5 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -191,10 +191,10 @@ pid_t pid_vnr(struct pid *pid); #define do_each_pid_thread(pid, type, task) \ do_each_pid_task(pid, type, task) { \ struct task_struct *tg___ = task; \ - do { + for_each_thread(tg___, task) { #define while_each_pid_thread(pid, type, task) \ - } while_each_thread(tg___, task); \ + } \ task = tg___; \ } while_each_pid_task(pid, type, task) #endif /* _LINUX_PID_H */ -- cgit v1.2.3 From 9de5ab8a2eeea9ae4b63b6f6353b415b93e020c0 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Mon, 27 Feb 2017 14:28:18 -0800 Subject: ipc/sem: add hysteresis sysv sem has two lock modes: One with per-semaphore locks, one lock mode with a single global lock for the whole array. When switching from the per-semaphore locks to the global lock, all per-semaphore locks must be scanned for ongoing operations. The patch adds a hysteresis for switching from the global lock to the per semaphore locks. This reduces how often the per-semaphore locks must be scanned. Compared to the initial patch, this is a simplified solution: Setting USE_GLOBAL_LOCK_HYSTERESIS to 1 restores the current behavior. In theory, a workload with exactly 10 simple sops and then one complex op now scales a bit worse, but this is pure theory: If there is concurrency, the it won't be exactly 10:1:10:1:10:1:... If there is no concurrency, then there is no need for scalability. Link: http://lkml.kernel.org/r/1476851896-3590-3-git-send-email-manfred@colorfullife.com Signed-off-by: Manfred Spraul Cc: Peter Zijlstra Cc: Davidlohr Bueso Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: <1vier1@web.de> Cc: kernel test robot Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sem.h b/include/linux/sem.h index d0efd6e6c20a..4fc222f8755d 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -21,7 +21,7 @@ struct sem_array { struct list_head list_id; /* undo requests on this array */ int sem_nsems; /* no. of semaphores in array */ int complex_count; /* pending complex operations */ - bool complex_mode; /* no parallel simple ops */ + unsigned int use_global_lock;/* >0: global lock required */ }; #ifdef CONFIG_SYSVIPC -- cgit v1.2.3 From 93407472a21b82f39c955ea7787e5bc7da100642 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 27 Feb 2017 14:28:32 -0800 Subject: fs: add i_blocksize() Replace all 1 << inode->i_blkbits and (1 << inode->i_blkbits) in fs branch. This patch also fixes multiple checkpatch warnings: WARNING: Prefer 'unsigned int' to bare use of 'unsigned' Thanks to Andrew Morton for suggesting more appropriate function instead of macro. [geliangtang@gmail.com: truncate: use i_blocksize()] Link: http://lkml.kernel.org/r/9c8b2cd83c8f5653805d43debde9fa8817e02fc4.1484895804.git.geliangtang@gmail.com Link: http://lkml.kernel.org/r/1481319905-10126-1-git-send-email-fabf@skynet.be Signed-off-by: Fabian Frederick Signed-off-by: Geliang Tang Cc: Alexander Viro Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c930cbc19342..c64f2cb7d364 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -655,6 +655,11 @@ struct inode { void *i_private; /* fs or device private pointer */ }; +static inline unsigned int i_blocksize(const struct inode *node) +{ + return (1 << node->i_blkbits); +} + static inline int inode_unhashed(struct inode *inode) { return hlist_unhashed(&inode->i_hash); -- cgit v1.2.3 From 03440c4e5e2f167764997a7e0f2dbb279d8078e6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Feb 2017 14:28:49 -0800 Subject: scripts/spelling.txt: add "an union" pattern and fix typo instances Fix typos and add the following to the scripts/spelling.txt: an union||a union Link: http://lkml.kernel.org/r/1481573103-11329-5-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dcache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index c965e4469499..591b6c16f9c1 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -562,7 +562,7 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper) * @inode: inode to select the dentry from multiple layers (can be NULL) * @flags: open flags to control copy-up behavior * - * If dentry is on an union/overlay, then return the underlying, real dentry. + * If dentry is on a union/overlay, then return the underlying, real dentry. * Otherwise return the dentry itself. * * See also: Documentation/filesystems/vfs.txt @@ -581,7 +581,7 @@ static inline struct dentry *d_real(struct dentry *dentry, * d_real_inode - Return the real inode * @dentry: The dentry to query * - * If dentry is on an union/overlay, then return the underlying, real inode. + * If dentry is on a union/overlay, then return the underlying, real inode. * Otherwise return d_inode(). */ static inline struct inode *d_real_inode(const struct dentry *dentry) -- cgit v1.2.3 From 8ab102d60a0c19df602f3758848d55f0703bf9bb Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Feb 2017 14:28:55 -0800 Subject: scripts/spelling.txt: add "partiton" pattern and fix typo instances Fix typos and add the following to the scripts/spelling.txt: partiton||partition Link: http://lkml.kernel.org/r/1481573103-11329-7-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mtd/qinfo.h | 2 +- include/linux/spi/flash.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/qinfo.h b/include/linux/mtd/qinfo.h index 7b3d487d8b3f..b532ce524dae 100644 --- a/include/linux/mtd/qinfo.h +++ b/include/linux/mtd/qinfo.h @@ -14,7 +14,7 @@ * @DevId - Chip Device ID * @qinfo - pointer to qinfo records describing the chip * @numchips - number of chips including virual RWW partitions - * @chipshift - Chip/partiton size 2^chipshift + * @chipshift - Chip/partition size 2^chipshift * @chips - per-chip data structure */ struct lpddr_private { diff --git a/include/linux/spi/flash.h b/include/linux/spi/flash.h index 3f22932e67a4..f4199e758f97 100644 --- a/include/linux/spi/flash.h +++ b/include/linux/spi/flash.h @@ -7,7 +7,7 @@ struct mtd_partition; * struct flash_platform_data: board-specific flash data * @name: optional flash device name (eg, as used with mtdparts=) * @parts: optional array of mtd_partitions for static partitioning - * @nr_parts: number of mtd_partitions for static partitoning + * @nr_parts: number of mtd_partitions for static partitioning * @type: optional flash device type (e.g. m25p80 vs m25p64), for use * with chips that can't be queried for JEDEC or other IDs * -- cgit v1.2.3 From 4091fb95b5f8dea37568d1a94c8227244bade891 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Feb 2017 14:29:56 -0800 Subject: scripts/spelling.txt: add "followings" pattern and fix typo instances Fix typos and add the following to the scripts/spelling.txt: followings||following While we are here, add a missing colon in the boilerplate in DT binding documents. The "you SoC" in allwinner,sunxi-pinctrl.txt was fixed as well. I reworded "as the followings:" to "as follows:" for drivers/usb/gadget/udc/renesas_usb3.c. Link: http://lkml.kernel.org/r/1481573103-11329-32-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kconfig.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h index 8f2e059e4d45..4d748603e818 100644 --- a/include/linux/kconfig.h +++ b/include/linux/kconfig.h @@ -8,7 +8,7 @@ /* * The use of "&&" / "||" is limited in certain expressions. - * The followings enable to calculate "and" / "or" with macro expansion only. + * The following enable to calculate "and" / "or" with macro expansion only. */ #define __and(x, y) ___and(x, y) #define ___and(x, y) ____and(__ARG_PLACEHOLDER_##x, y) -- cgit v1.2.3 From f1f1007644ffc8051a4c11427d58b1967ae7b75a Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 27 Feb 2017 14:30:07 -0800 Subject: mm: add new mmgrab() helper Apart from adding the helper function itself, the rest of the kernel is converted mechanically using: git grep -l 'atomic_inc.*mm_count' | xargs sed -i 's/atomic_inc(&\(.*\)->mm_count);/mmgrab\(\1\);/' git grep -l 'atomic_inc.*mm_count' | xargs sed -i 's/atomic_inc(&\(.*\)\.mm_count);/mmgrab\(\&\1\);/' This is needed for a later patch that hooks into the helper, but might be a worthwhile cleanup on its own. (Michal Hocko provided most of the kerneldoc comment.) Link: http://lkml.kernel.org/r/20161218123229.22952-1-vegard.nossum@oracle.com Signed-off-by: Vegard Nossum Acked-by: Michal Hocko Acked-by: Peter Zijlstra (Intel) Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 451e241f32c5..7cfa5546c840 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2904,6 +2904,28 @@ static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) */ extern struct mm_struct * mm_alloc(void); +/** + * mmgrab() - Pin a &struct mm_struct. + * @mm: The &struct mm_struct to pin. + * + * Make sure that @mm will not get freed even after the owning task + * exits. This doesn't guarantee that the associated address space + * will still exist later on and mmget_not_zero() has to be used before + * accessing it. + * + * This is a preferred way to to pin @mm for a longer/unbounded amount + * of time. + * + * Use mmdrop() to release the reference acquired by mmgrab(). + * + * See also for an in-depth explanation + * of &mm_struct.mm_count vs &mm_struct.mm_users. + */ +static inline void mmgrab(struct mm_struct *mm) +{ + atomic_inc(&mm->mm_count); +} + /* mmdrop drops the mm and the page tables */ extern void __mmdrop(struct mm_struct *); static inline void mmdrop(struct mm_struct *mm) -- cgit v1.2.3 From 3fce371bfac2be0396ffc1e763600e6c6b1bb52a Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 27 Feb 2017 14:30:10 -0800 Subject: mm: add new mmget() helper Apart from adding the helper function itself, the rest of the kernel is converted mechanically using: git grep -l 'atomic_inc.*mm_users' | xargs sed -i 's/atomic_inc(&\(.*\)->mm_users);/mmget\(\1\);/' git grep -l 'atomic_inc.*mm_users' | xargs sed -i 's/atomic_inc(&\(.*\)\.mm_users);/mmget\(\&\1\);/' This is needed for a later patch that hooks into the helper, but might be a worthwhile cleanup on its own. (Michal Hocko provided most of the kerneldoc comment.) Link: http://lkml.kernel.org/r/20161218123229.22952-2-vegard.nossum@oracle.com Signed-off-by: Vegard Nossum Acked-by: Michal Hocko Acked-by: Peter Zijlstra (Intel) Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7cfa5546c840..4a28deb5f210 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2948,6 +2948,27 @@ static inline void mmdrop_async(struct mm_struct *mm) } } +/** + * mmget() - Pin the address space associated with a &struct mm_struct. + * @mm: The address space to pin. + * + * Make sure that the address space of the given &struct mm_struct doesn't + * go away. This does not protect against parts of the address space being + * modified or freed, however. + * + * Never use this function to pin this address space for an + * unbounded/indefinite amount of time. + * + * Use mmput() to release the reference acquired by mmget(). + * + * See also for an in-depth explanation + * of &mm_struct.mm_count vs &mm_struct.mm_users. + */ +static inline void mmget(struct mm_struct *mm) +{ + atomic_inc(&mm->mm_users); +} + static inline bool mmget_not_zero(struct mm_struct *mm) { return atomic_inc_not_zero(&mm->mm_users); -- cgit v1.2.3 From b279ddc3382426f8e05068de3488a2993f68dc26 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 27 Feb 2017 14:30:16 -0800 Subject: mm: clarify mm_struct.mm_{users,count} documentation Clarify documentation relating to mm_users and mm_count, and switch to kernel-doc syntax. Link: http://lkml.kernel.org/r/20161218123229.22952-4-vegard.nossum@oracle.com Signed-off-by: Vegard Nossum Acked-by: Michal Hocko Acked-by: Peter Zijlstra (Intel) Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 808751d7b737..4f6d440ad785 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -407,8 +407,27 @@ struct mm_struct { unsigned long task_size; /* size of task vm space */ unsigned long highest_vm_end; /* highest vma end address */ pgd_t * pgd; - atomic_t mm_users; /* How many users with user space? */ - atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ + + /** + * @mm_users: The number of users including userspace. + * + * Use mmget()/mmget_not_zero()/mmput() to modify. When this drops + * to 0 (i.e. when the task exits and there are no other temporary + * reference holders), we also release a reference on @mm_count + * (which may then free the &struct mm_struct if @mm_count also + * drops to 0). + */ + atomic_t mm_users; + + /** + * @mm_count: The number of references to &struct mm_struct + * (@mm_users count as 1). + * + * Use mmgrab()/mmdrop() to modify. When this drops to 0, the + * &struct mm_struct is freed. + */ + atomic_t mm_count; + atomic_long_t nr_ptes; /* PTE page table pages */ #if CONFIG_PGTABLE_LEVELS > 2 atomic_long_t nr_pmds; /* PMD page table pages */ -- cgit v1.2.3 From 2959a5f726f6510d6dd7c958f8877e08d0cf589c Mon Sep 17 00:00:00 2001 From: Jinbum Park Date: Mon, 27 Feb 2017 14:30:22 -0800 Subject: mm: add arch-independent testcases for RODATA This patch makes arch-independent testcases for RODATA. Both x86 and x86_64 already have testcases for RODATA, But they are arch-specific because using inline assembly directly. And cacheflush.h is not a suitable location for rodata-test related things. Since they were in cacheflush.h, If someone change the state of CONFIG_DEBUG_RODATA_TEST, It cause overhead of kernel build. To solve the above issues, write arch-independent testcases and move it to shared location. [jinb.park7@gmail.com: fix config dependency] Link: http://lkml.kernel.org/r/20170209131625.GA16954@pjb1027-Latitude-E5410 Link: http://lkml.kernel.org/r/20170129105436.GA9303@pjb1027-Latitude-E5410 Signed-off-by: Jinbum Park Acked-by: Kees Cook Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Arjan van de Ven Cc: Laura Abbott Cc: Russell King Cc: Valentin Rothberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rodata_test.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/linux/rodata_test.h (limited to 'include/linux') diff --git a/include/linux/rodata_test.h b/include/linux/rodata_test.h new file mode 100644 index 000000000000..ea05f6c51413 --- /dev/null +++ b/include/linux/rodata_test.h @@ -0,0 +1,23 @@ +/* + * rodata_test.h: functional test for mark_rodata_ro function + * + * (C) Copyright 2008 Intel Corporation + * Author: Arjan van de Ven + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#ifndef _RODATA_TEST_H +#define _RODATA_TEST_H + +#ifdef CONFIG_DEBUG_RODATA_TEST +extern const int rodata_test_data; +void rodata_test(void); +#else +static inline void rodata_test(void) {} +#endif + +#endif /* _RODATA_TEST_H */ -- cgit v1.2.3 From 4e4636cf981b5b629fbfb78aa9f232e015f7d521 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 27 Feb 2017 22:21:16 -0600 Subject: objtool: Enclose contents of unreachable() macro in a block Guenter Roeck reported a boot failure in mips64. It was bisected to the following commit: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends") The unreachable() macro was formerly only composed of a single statement. The above commit added a second statement, but neglected to enclose the statements in a block. Suggested-by: Guenter Roeck Reported-by: Guenter Roeck Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends") Link: http://lkml.kernel.org/r/20170228042116.glmwmwiohcix7o4a@treble Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index de471346b365..f457b520ead6 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -215,7 +215,8 @@ * this in the preprocessor, but we can live with this because they're * unreleased. Really, we need to have autoconf for the kernel. */ -#define unreachable() annotate_unreachable(); __builtin_unreachable() +#define unreachable() \ + do { annotate_unreachable(); __builtin_unreachable(); } while (0) /* Mark a function definition as prohibited from being cloned. */ #define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) -- cgit v1.2.3 From 7807e086a2d1f69cc1a57958cac04fea79fc2112 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Sat, 11 Feb 2017 14:02:49 +0100 Subject: ARM: OMAP2+: gpmc-onenand: propagate error on initialization failure gpmc_probe_onenand_child returns success even on gpmc_onenand_init failure. Fix that. Signed-off-by: Ladislav Michl Acked-by: Roger Quadros Signed-off-by: Tony Lindgren --- include/linux/omap-gpmc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index 35d0fd7a4948..e821a3132a3e 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -88,10 +88,11 @@ static inline int gpmc_nand_init(struct omap_nand_platform_data *d, #endif #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2) -extern void gpmc_onenand_init(struct omap_onenand_platform_data *d); +extern int gpmc_onenand_init(struct omap_onenand_platform_data *d); #else #define board_onenand_data NULL -static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d) +static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d) { + return 0; } #endif -- cgit v1.2.3 From ac28e47ccc3ff8dabce1aec6b224760c3e524044 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Tue, 21 Feb 2017 10:44:45 +0100 Subject: ARM: OMAP2+: Remove legacy gpmc-nand.c This code is no longer used and can be removed as we are using device tree. Removing this code also removes a dependency between drivers/mtd and arch/arm/mach-omap2 making furhter driver changes easier. Signed-off-by: Ladislav Michl [tony@atomide.com: removed from header too, updated comments] Signed-off-by: Tony Lindgren --- include/linux/omap-gpmc.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index e821a3132a3e..fd0de00c0d77 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -76,17 +76,6 @@ struct gpmc_timings; struct omap_nand_platform_data; struct omap_onenand_platform_data; -#if IS_ENABLED(CONFIG_MTD_NAND_OMAP2) -extern int gpmc_nand_init(struct omap_nand_platform_data *d, - struct gpmc_timings *gpmc_t); -#else -static inline int gpmc_nand_init(struct omap_nand_platform_data *d, - struct gpmc_timings *gpmc_t) -{ - return 0; -} -#endif - #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2) extern int gpmc_onenand_init(struct omap_onenand_platform_data *d); #else -- cgit v1.2.3 From 86ef58a4e35e8fa66afb5898cf6dec6a3bb29f67 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 28 Feb 2017 18:32:48 -0800 Subject: nfit, libnvdimm: fix interleave set cookie calculation The interleave-set cookie is a sum that sanity checks the composition of an interleave set has not changed from when the namespace was initially created. The checksum is calculated by sorting the DIMMs by their location in the interleave-set. The comparison for the sort must be 64-bit wide, not byte-by-byte as performed by memcmp() in the broken case. Fix the implementation to accept correct cookie values in addition to the Linux "memcmp" order cookies, but only allow correct cookies to be generated going forward. It does mean that namespaces created by third-party-tooling, or created by newer kernels with this fix, will not validate on older kernels. However, there are a couple mitigating conditions: 1/ platforms with namespace-label capable NVDIMMs are not widely available. 2/ interleave-sets with a single-dimm are by definition not affected (nothing to sort). This covers the QEMU-KVM NVDIMM emulation case. The cookie stored in the namespace label will be fixed by any write the namespace label, the most straightforward way to achieve this is to write to the "alt_name" attribute of a namespace in sysfs. Cc: Fixes: eaf961536e16 ("libnvdimm, nfit: add interleave-set state-tracking infrastructure") Reported-by: Nicholas Moulin Tested-by: Nicholas Moulin Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 8458c5351e56..77e7af32543f 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -70,6 +70,8 @@ struct nd_cmd_desc { struct nd_interleave_set { u64 cookie; + /* compatibility with initial buggy Linux implementation */ + u64 altcookie; }; struct nd_mapping_desc { -- cgit v1.2.3 From 55149d06534ae2a7ba5f7a078353deb89b3fe891 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 1 Mar 2017 00:05:04 -0600 Subject: objtool, compiler.h: Fix __unreachable section relocation size Linus reported the following commit broke module loading on his laptop: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends") It showed errors like the following: module: overflow in relocation type 10 val ffffffffc02afc81 module: 'nvme' likely not compiled with -mcmodel=kernel The problem is that the __unreachable section addresses are stored using the '.long' asm directive, which isn't big enough for .text section kernel addresses. Use relative addresses instead: ".long %c0b - .\t\n" Suggested-by: Linus Torvalds Reported-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends") Link: http://lkml.kernel.org/r/20170301060504.oltm3iws6fmubnom@treble Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 76e28c229805..b6bb9019d87f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -201,7 +201,7 @@ #define annotate_unreachable() ({ \ asm("%c0:\t\n" \ ".pushsection __unreachable, \"a\"\t\n" \ - ".long %c0b\t\n" \ + ".long %c0b - .\t\n" \ ".popsection\t\n" : : "i" (__LINE__)); \ }) #else -- cgit v1.2.3 From e3736c3eb3a6f7c0966923b629c9f92b558aa9c7 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Mon, 20 Feb 2017 13:06:21 +0200 Subject: kvm: convert kvm.users_count from atomic_t to refcount_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: Radim Krčmář --- include/linux/kvm_host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8d69d5150748..2c14ad9809da 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -401,7 +402,7 @@ struct kvm { #endif struct kvm_vm_stat stat; struct kvm_arch arch; - atomic_t users_count; + refcount_t users_count; #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; spinlock_t ring_lock; -- cgit v1.2.3 From b2d0fe35471d1a71471f99147ffb5986bd60e744 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 28 Feb 2017 15:02:15 +0300 Subject: net/mlx4: && vs & typo Bitwise & was obviously intended here. Fixes: 745d8ae4622c ("net/mlx4: Spoofcheck and zero MAC can't coexist") Signed-off-by: Dan Carpenter Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- include/linux/mlx4/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index e965e5090d96..a858bcb6220b 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -109,7 +109,7 @@ static inline void mlx4_u64_to_mac(u8 *addr, u64 mac) int i; for (i = ETH_ALEN; i > 0; i--) { - addr[i - 1] = mac && 0xFF; + addr[i - 1] = mac & 0xFF; mac >>= 8; } } -- cgit v1.2.3 From 39e6c8208d7b6fb9d2047850fb3327db567b564b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Feb 2017 10:34:50 -0800 Subject: net: solve a NAPI race While playing with mlx4 hardware timestamping of RX packets, I found that some packets were received by TCP stack with a ~200 ms delay... Since the timestamp was provided by the NIC, and my probe was added in tcp_v4_rcv() while in BH handler, I was confident it was not a sender issue, or a drop in the network. This would happen with a very low probability, but hurting RPC workloads. A NAPI driver normally arms the IRQ after the napi_complete_done(), after NAPI_STATE_SCHED is cleared, so that the hard irq handler can grab it. Problem is that if another point in the stack grabs NAPI_STATE_SCHED bit while IRQ are not disabled, we might have later an IRQ firing and finding this bit set, right before napi_complete_done() clears it. This can happen with busy polling users, or if gro_flush_timeout is used. But some other uses of napi_schedule() in drivers can cause this as well. thread 1 thread 2 (could be on same cpu, or not) // busy polling or napi_watchdog() napi_schedule(); ... napi->poll() device polling: read 2 packets from ring buffer Additional 3rd packet is available. device hard irq // does nothing because NAPI_STATE_SCHED bit is owned by thread 1 napi_schedule(); napi_complete_done(napi, 2); rearm_irq(); Note that rearm_irq() will not force the device to send an additional IRQ for the packet it already signaled (3rd packet in my example) This patch adds a new NAPI_STATE_MISSED bit, that napi_schedule_prep() can set if it could not grab NAPI_STATE_SCHED Then napi_complete_done() properly reschedules the napi to make sure we do not miss something. Since we manipulate multiple bits at once, use cmpxchg() like in sk_busy_loop() to provide proper transactions. In v2, I changed napi_watchdog() to use a relaxed variant of napi_schedule_prep() : No need to set NAPI_STATE_MISSED from this point. In v3, I added more details in the changelog and clears NAPI_STATE_MISSED in busy_poll_stop() In v4, I added the ideas given by Alexander Duyck in v3 review Signed-off-by: Eric Dumazet Cc: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f40f0ab3847a..97456b2539e4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -330,6 +330,7 @@ struct napi_struct { enum { NAPI_STATE_SCHED, /* Poll is scheduled */ + NAPI_STATE_MISSED, /* reschedule a napi */ NAPI_STATE_DISABLE, /* Disable pending */ NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ @@ -338,12 +339,13 @@ enum { }; enum { - NAPIF_STATE_SCHED = (1UL << NAPI_STATE_SCHED), - NAPIF_STATE_DISABLE = (1UL << NAPI_STATE_DISABLE), - NAPIF_STATE_NPSVC = (1UL << NAPI_STATE_NPSVC), - NAPIF_STATE_HASHED = (1UL << NAPI_STATE_HASHED), - NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL), - NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL), + NAPIF_STATE_SCHED = BIT(NAPI_STATE_SCHED), + NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED), + NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE), + NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC), + NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED), + NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), + NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), }; enum gro_result { @@ -414,20 +416,7 @@ static inline bool napi_disable_pending(struct napi_struct *n) return test_bit(NAPI_STATE_DISABLE, &n->state); } -/** - * napi_schedule_prep - check if NAPI can be scheduled - * @n: NAPI context - * - * Test if NAPI routine is already running, and if not mark - * it as running. This is used as a condition variable to - * insure only one NAPI poll instance runs. We also make - * sure there is no pending NAPI disable. - */ -static inline bool napi_schedule_prep(struct napi_struct *n) -{ - return !napi_disable_pending(n) && - !test_and_set_bit(NAPI_STATE_SCHED, &n->state); -} +bool napi_schedule_prep(struct napi_struct *n); /** * napi_schedule - schedule NAPI poll -- cgit v1.2.3 From e390f9a9689a42f477a6073e2e7df530a4c1b740 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 1 Mar 2017 12:04:44 -0600 Subject: objtool, modules: Discard objtool annotation sections for modules The '__unreachable' and '__func_stack_frame_non_standard' sections are only used at compile time. They're discarded for vmlinux but they should also be discarded for modules. Since this is a recurring pattern, prefix the section names with ".discard.". It's a nice convention and vmlinux.lds.h already discards such sections. Also remove the 'a' (allocatable) flag from the __unreachable section since it doesn't make sense for a discarded section. Suggested-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Jessica Yu Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends") Link: http://lkml.kernel.org/r/20170301180444.lhd53c5tibc4ns77@treble Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc.h | 2 +- include/linux/frame.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index b6bb9019d87f..0efef9cf014f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -200,7 +200,7 @@ #ifdef CONFIG_STACK_VALIDATION #define annotate_unreachable() ({ \ asm("%c0:\t\n" \ - ".pushsection __unreachable, \"a\"\t\n" \ + ".pushsection .discard.unreachable\t\n" \ ".long %c0b - .\t\n" \ ".popsection\t\n" : : "i" (__LINE__)); \ }) diff --git a/include/linux/frame.h b/include/linux/frame.h index e6baaba3f1ae..d772c61c31da 100644 --- a/include/linux/frame.h +++ b/include/linux/frame.h @@ -11,7 +11,7 @@ * For more information, see tools/objtool/Documentation/stack-validation.txt. */ #define STACK_FRAME_NON_STANDARD(func) \ - static void __used __section(__func_stack_frame_non_standard) \ + static void __used __section(.discard.func_stack_frame_non_standard) \ *__func_stack_frame_non_standard_##func = func #else /* !CONFIG_STACK_VALIDATION */ -- cgit v1.2.3 From 0837e49ab3fa8d903a499984575d71efee8097ce Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 1 Mar 2017 15:11:23 +0000 Subject: KEYS: Differentiate uses of rcu_dereference_key() and user_key_payload() rcu_dereference_key() and user_key_payload() are currently being used in two different, incompatible ways: (1) As a wrapper to rcu_dereference() - when only the RCU read lock used to protect the key. (2) As a wrapper to rcu_dereference_protected() - when the key semaphor is used to protect the key and the may be being modified. Fix this by splitting both of the key wrappers to produce: (1) RCU accessors for keys when caller has the key semaphore locked: dereference_key_locked() user_key_payload_locked() (2) RCU accessors for keys when caller holds the RCU read lock: dereference_key_rcu() user_key_payload_rcu() This should fix following warning in the NFS idmapper =============================== [ INFO: suspicious RCU usage. ] 4.10.0 #1 Tainted: G W ------------------------------- ./include/keys/user-type.h:53 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 0 1 lock held by mount.nfs/5987: #0: (rcu_read_lock){......}, at: [] nfs_idmap_get_key+0x15c/0x420 [nfsv4] stack backtrace: CPU: 1 PID: 5987 Comm: mount.nfs Tainted: G W 4.10.0 #1 Call Trace: dump_stack+0xe8/0x154 (unreliable) lockdep_rcu_suspicious+0x140/0x190 nfs_idmap_get_key+0x380/0x420 [nfsv4] nfs_map_name_to_uid+0x2a0/0x3b0 [nfsv4] decode_getfattr_attrs+0xfac/0x16b0 [nfsv4] decode_getfattr_generic.constprop.106+0xbc/0x150 [nfsv4] nfs4_xdr_dec_lookup_root+0xac/0xb0 [nfsv4] rpcauth_unwrap_resp+0xe8/0x140 [sunrpc] call_decode+0x29c/0x910 [sunrpc] __rpc_execute+0x140/0x8f0 [sunrpc] rpc_run_task+0x170/0x200 [sunrpc] nfs4_call_sync_sequence+0x68/0xa0 [nfsv4] _nfs4_lookup_root.isra.44+0xd0/0xf0 [nfsv4] nfs4_lookup_root+0xe0/0x350 [nfsv4] nfs4_lookup_root_sec+0x70/0xa0 [nfsv4] nfs4_find_root_sec+0xc4/0x100 [nfsv4] nfs4_proc_get_rootfh+0x5c/0xf0 [nfsv4] nfs4_get_rootfh+0x6c/0x190 [nfsv4] nfs4_server_common_setup+0xc4/0x260 [nfsv4] nfs4_create_server+0x278/0x3c0 [nfsv4] nfs4_remote_mount+0x50/0xb0 [nfsv4] mount_fs+0x74/0x210 vfs_kern_mount+0x78/0x220 nfs_do_root_mount+0xb0/0x140 [nfsv4] nfs4_try_mount+0x60/0x100 [nfsv4] nfs_fs_mount+0x5ec/0xda0 [nfs] mount_fs+0x74/0x210 vfs_kern_mount+0x78/0x220 do_mount+0x254/0xf70 SyS_mount+0x94/0x100 system_call+0x38/0xe0 Reported-by: Jan Stancek Signed-off-by: David Howells Tested-by: Jan Stancek Signed-off-by: James Morris --- include/linux/key.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 722914798f37..e45212f2777e 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -354,7 +354,10 @@ static inline bool key_is_instantiated(const struct key *key) !test_bit(KEY_FLAG_NEGATIVE, &key->flags); } -#define rcu_dereference_key(KEY) \ +#define dereference_key_rcu(KEY) \ + (rcu_dereference((KEY)->payload.rcu_data0)) + +#define dereference_key_locked(KEY) \ (rcu_dereference_protected((KEY)->payload.rcu_data0, \ rwsem_is_locked(&((struct key *)(KEY))->sem))) -- cgit v1.2.3 From eb1e011a14748a1d9df9a7d7df9a5711721a1bdb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 Feb 2017 09:49:26 +0100 Subject: average: change to declare precision, not factor Declaring the factor is counter-intuitive, and people are prone to using small(-ish) values even when that makes no sense. Change the DECLARE_EWMA() macro to take the fractional precision, in bits, rather than a factor, and update all users. While at it, add some more documentation. Acked-by: David S. Miller Signed-off-by: Johannes Berg --- include/linux/average.h | 61 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/average.h b/include/linux/average.h index d04aa58280de..7ddaf340d2ac 100644 --- a/include/linux/average.h +++ b/include/linux/average.h @@ -1,45 +1,66 @@ #ifndef _LINUX_AVERAGE_H #define _LINUX_AVERAGE_H -/* Exponentially weighted moving average (EWMA) */ +/* + * Exponentially weighted moving average (EWMA) + * + * This implements a fixed-precision EWMA algorithm, with both the + * precision and fall-off coefficient determined at compile-time + * and built into the generated helper funtions. + * + * The first argument to the macro is the name that will be used + * for the struct and helper functions. + * + * The second argument, the precision, expresses how many bits are + * used for the fractional part of the fixed-precision values. + * + * The third argument, the weight reciprocal, determines how the + * new values will be weighed vs. the old state, new values will + * get weight 1/weight_rcp and old values 1-1/weight_rcp. Note + * that this parameter must be a power of two for efficiency. + */ -#define DECLARE_EWMA(name, _factor, _weight) \ +#define DECLARE_EWMA(name, _precision, _weight_rcp) \ struct ewma_##name { \ unsigned long internal; \ }; \ static inline void ewma_##name##_init(struct ewma_##name *e) \ { \ - BUILD_BUG_ON(!__builtin_constant_p(_factor)); \ - BUILD_BUG_ON(!__builtin_constant_p(_weight)); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_factor); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_weight); \ + BUILD_BUG_ON(!__builtin_constant_p(_precision)); \ + BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp)); \ + /* \ + * Even if you want to feed it just 0/1 you should have \ + * some bits for the non-fractional part... \ + */ \ + BUILD_BUG_ON((_precision) > 30); \ + BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \ e->internal = 0; \ } \ static inline unsigned long \ ewma_##name##_read(struct ewma_##name *e) \ { \ - BUILD_BUG_ON(!__builtin_constant_p(_factor)); \ - BUILD_BUG_ON(!__builtin_constant_p(_weight)); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_factor); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_weight); \ - return e->internal >> ilog2(_factor); \ + BUILD_BUG_ON(!__builtin_constant_p(_precision)); \ + BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp)); \ + BUILD_BUG_ON((_precision) > 30); \ + BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \ + return e->internal >> (_precision); \ } \ static inline void ewma_##name##_add(struct ewma_##name *e, \ unsigned long val) \ { \ unsigned long internal = ACCESS_ONCE(e->internal); \ - unsigned long weight = ilog2(_weight); \ - unsigned long factor = ilog2(_factor); \ + unsigned long weight_rcp = ilog2(_weight_rcp); \ + unsigned long precision = _precision; \ \ - BUILD_BUG_ON(!__builtin_constant_p(_factor)); \ - BUILD_BUG_ON(!__builtin_constant_p(_weight)); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_factor); \ - BUILD_BUG_ON_NOT_POWER_OF_2(_weight); \ + BUILD_BUG_ON(!__builtin_constant_p(_precision)); \ + BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp)); \ + BUILD_BUG_ON((_precision) > 30); \ + BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \ \ ACCESS_ONCE(e->internal) = internal ? \ - (((internal << weight) - internal) + \ - (val << factor)) >> weight : \ - (val << factor); \ + (((internal << weight_rcp) - internal) + \ + (val << precision)) >> weight_rcp : \ + (val << precision); \ } #endif /* _LINUX_AVERAGE_H */ -- cgit v1.2.3 From 9ccd27cc2e8bfbec502d7c64a353dc4489536b81 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 21:11:09 +0100 Subject: sched/headers: Make all include/linux/sched/*.h headers build standalone Make each header self-sufficient, so that it can be built successfully both in an allnoconfig and allyesconfig kernel. Also standardize the naming of their header guards. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/deadline.h | 8 +++++--- include/linux/sched/prio.h | 6 +++--- include/linux/sched/rt.h | 10 ++++++---- include/linux/sched/sysctl.h | 10 +++++++--- 4 files changed, 21 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 9089a2ae913d..975be862e083 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -1,5 +1,7 @@ -#ifndef _SCHED_DEADLINE_H -#define _SCHED_DEADLINE_H +#ifndef _LINUX_SCHED_DEADLINE_H +#define _LINUX_SCHED_DEADLINE_H + +#include /* * SCHED_DEADLINE tasks has negative priorities, reflecting @@ -26,4 +28,4 @@ static inline bool dl_time_before(u64 a, u64 b) return (s64)(a - b) < 0; } -#endif /* _SCHED_DEADLINE_H */ +#endif /* _LINUX_SCHED_DEADLINE_H */ diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index d9cf5a5762d9..2cc450f6ec54 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -1,5 +1,5 @@ -#ifndef _SCHED_PRIO_H -#define _SCHED_PRIO_H +#ifndef _LINUX_SCHED_PRIO_H +#define _LINUX_SCHED_PRIO_H #define MAX_NICE 19 #define MIN_NICE -20 @@ -57,4 +57,4 @@ static inline long rlimit_to_nice(long prio) return (MAX_NICE - prio + 1); } -#endif /* _SCHED_PRIO_H */ +#endif /* _LINUX_SCHED_PRIO_H */ diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index a30b172df6e1..3bd668414f61 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -1,7 +1,9 @@ -#ifndef _SCHED_RT_H -#define _SCHED_RT_H +#ifndef _LINUX_SCHED_RT_H +#define _LINUX_SCHED_RT_H -#include +#include + +struct task_struct; static inline int rt_prio(int prio) { @@ -57,4 +59,4 @@ extern void normalize_rt_tasks(void); */ #define RR_TIMESLICE (100 * HZ / 1000) -#endif /* _SCHED_RT_H */ +#endif /* _LINUX_SCHED_RT_H */ diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 49308e142aae..0f5ecd4d298e 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -1,5 +1,9 @@ -#ifndef _SCHED_SYSCTL_H -#define _SCHED_SYSCTL_H +#ifndef _LINUX_SCHED_SYSCTL_H +#define _LINUX_SCHED_SYSCTL_H + +#include + +struct ctl_table; #ifdef CONFIG_DETECT_HUNG_TASK extern int sysctl_hung_task_check_count; @@ -78,4 +82,4 @@ extern int sysctl_schedstats(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -#endif /* _SCHED_SYSCTL_H */ +#endif /* _LINUX_SCHED_SYSCTL_H */ -- cgit v1.2.3 From c930b2c0de32f45ce8f67affe936ce7a05b07b00 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 12:22:54 +0100 Subject: sched/core: Convert ___assert_task_state() link time assert to BUILD_BUG_ON() The length of TASK_STATE_TO_CHAR_STR was still checked using the old link-time manual error method - convert it to BUILD_BUG_ON(). This has a couple of advantages: - it's more obvious what's going on - it reduces the size and complexity of - BUILD_BUG_ON() will fail during compilation, with a clearer error message than the link time assert. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4a28deb5f210..c204613396cd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -223,9 +223,6 @@ extern void proc_sched_set_task(struct task_struct *p); #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" -extern char ___assert_task_state[1 - 2*!!( - sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; - /* Convenience macros for the sake of set_current_state */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) -- cgit v1.2.3 From 59ddbcb2f45b958cf1f11f122b666cbcf50cd57b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 23:37:48 +0100 Subject: sched/core: Move the get_preempt_disable_ip() inline to sched/core.c It's defined in , but nothing outside the scheduler uses it - so move it to the sched/core.c usage site. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c204613396cd..df42cac04243 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3419,15 +3419,6 @@ static inline void cond_resched_rcu(void) #endif } -static inline unsigned long get_preempt_disable_ip(struct task_struct *p) -{ -#ifdef CONFIG_DEBUG_PREEMPT - return p->preempt_disable_ip; -#else - return 0; -#endif -} - /* * Does a critical section need to be broken due to another * task waiting?: (technically does not depend on CONFIG_PREEMPT, -- cgit v1.2.3 From 0c98d344fe5c27f6e4bce42ac503e9e9a51c7d1d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 15:38:10 +0100 Subject: sched/core: Remove the tsk_cpus_allowed() wrapper So the original intention of tsk_cpus_allowed() was to 'future-proof' the field - but it's pretty ineffectual at that, because half of the code uses ->cpus_allowed directly ... Also, the wrapper makes the code longer than the original expression! So just get rid of it. This also shrinks a bit. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index df42cac04243..6d1cc20cc477 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1995,9 +1995,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) } #endif -/* Future-safe accessor for struct task_struct's cpus_allowed. */ -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) - static inline int tsk_nr_cpus_allowed(struct task_struct *p) { return p->nr_cpus_allowed; -- cgit v1.2.3 From 4b53a3412d6663214ce9c754eff9373a9cff9dee Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 15:41:03 +0100 Subject: sched/core: Remove the tsk_nr_cpus_allowed() wrapper tsk_nr_cpus_allowed() too is a pretty pointless wrapper that is not used consistently and which makes the code both harder to read and longer as well. So remove it - this also shrinks a bit. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6d1cc20cc477..e732881517f2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1995,11 +1995,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) } #endif -static inline int tsk_nr_cpus_allowed(struct task_struct *p) -{ - return p->nr_cpus_allowed; -} - #define TNF_MIGRATED 0x01 #define TNF_NO_GROUP 0x02 #define TNF_SHARED 0x04 -- cgit v1.2.3 From f9411ebe3d85cbbea06298241e6053d031d281fc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 6 Feb 2017 09:50:49 +0100 Subject: rcu: Separate the RCU synchronization types and APIs into So rcupdate.h is a pretty complex header, in particular it includes which includes - creating a dependency that includes in , which prevents the isolation of from the derived header. Solve part of the problem by decoupling rcupdate.h from completions: this can be done by separating out the rcu_synchronize types and APIs, and updating their usage sites. Since this is a mostly RCU-internal types this will not just simplify 's dependencies, but will make all the hundreds of .c files that include rcupdate.h but not completions or wait.h build faster. ( For rcutiny this means that two dependent APIs have to be uninlined, but that shouldn't be much of a problem as they are rare variants. ) Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/dcache.h | 1 + include/linux/rcupdate.h | 40 ---------------------------------- include/linux/rcupdate_wait.h | 50 +++++++++++++++++++++++++++++++++++++++++++ include/linux/rcutiny.h | 11 ++-------- 4 files changed, 53 insertions(+), 49 deletions(-) create mode 100644 include/linux/rcupdate_wait.h (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 591b6c16f9c1..d2e38dc6172c 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -11,6 +11,7 @@ #include #include #include +#include struct path; struct vfsmount; diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6ade6a52d9d4..de88b33c0974 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -226,45 +225,6 @@ void call_rcu_sched(struct rcu_head *head, void synchronize_sched(void); -/* - * Structure allowing asynchronous waiting on RCU. - */ -struct rcu_synchronize { - struct rcu_head head; - struct completion completion; -}; -void wakeme_after_rcu(struct rcu_head *head); - -void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, - struct rcu_synchronize *rs_array); - -#define _wait_rcu_gp(checktiny, ...) \ -do { \ - call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ - struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \ - __wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array), \ - __crcu_array, __rs_array); \ -} while (0) - -#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) - -/** - * synchronize_rcu_mult - Wait concurrently for multiple grace periods - * @...: List of call_rcu() functions for the flavors to wait on. - * - * This macro waits concurrently for multiple flavors of RCU grace periods. - * For example, synchronize_rcu_mult(call_rcu, call_rcu_bh) would wait - * on concurrent RCU and RCU-bh grace periods. Waiting on a give SRCU - * domain requires you to write a wrapper function for that SRCU domain's - * call_srcu() function, supplying the corresponding srcu_struct. - * - * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU - * or RCU-bh, given that anywhere synchronize_rcu_mult() can be called - * is automatically a grace period. - */ -#define synchronize_rcu_mult(...) \ - _wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__) - /** * call_rcu_tasks() - Queue an RCU for invocation task-based grace period * @head: structure to be used for queueing the RCU updates. diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h new file mode 100644 index 000000000000..e774b4f5f220 --- /dev/null +++ b/include/linux/rcupdate_wait.h @@ -0,0 +1,50 @@ +#ifndef _LINUX_SCHED_RCUPDATE_WAIT_H +#define _LINUX_SCHED_RCUPDATE_WAIT_H + +/* + * RCU synchronization types and methods: + */ + +#include +#include + +/* + * Structure allowing asynchronous waiting on RCU. + */ +struct rcu_synchronize { + struct rcu_head head; + struct completion completion; +}; +void wakeme_after_rcu(struct rcu_head *head); + +void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, + struct rcu_synchronize *rs_array); + +#define _wait_rcu_gp(checktiny, ...) \ +do { \ + call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ + struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \ + __wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array), \ + __crcu_array, __rs_array); \ +} while (0) + +#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) + +/** + * synchronize_rcu_mult - Wait concurrently for multiple grace periods + * @...: List of call_rcu() functions for the flavors to wait on. + * + * This macro waits concurrently for multiple flavors of RCU grace periods. + * For example, synchronize_rcu_mult(call_rcu, call_rcu_bh) would wait + * on concurrent RCU and RCU-bh grace periods. Waiting on a give SRCU + * domain requires you to write a wrapper function for that SRCU domain's + * call_srcu() function, supplying the corresponding srcu_struct. + * + * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU + * or RCU-bh, given that anywhere synchronize_rcu_mult() can be called + * is automatically a grace period. + */ +#define synchronize_rcu_mult(...) \ + _wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__) + +#endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 4f9b2fa2173d..b452953e21c8 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -53,15 +53,8 @@ static inline void cond_synchronize_sched(unsigned long oldstate) might_sleep(); } -static inline void rcu_barrier_bh(void) -{ - wait_rcu_gp(call_rcu_bh); -} - -static inline void rcu_barrier_sched(void) -{ - wait_rcu_gp(call_rcu_sched); -} +extern void rcu_barrier_bh(void); +extern void rcu_barrier_sched(void); static inline void synchronize_rcu_expedited(void) { -- cgit v1.2.3 From 780de9dd2720debc14c501dab4dc80d1f75ad50e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 11:50:56 +0100 Subject: sched/headers, cgroups: Remove the threadgroup_change_*() wrappery threadgroup_change_begin()/end() is a pointless wrapper around cgroup_threadgroup_change_begin()/end(), minus a might_sleep() in the !CONFIG_CGROUPS=y case. Remove the wrappery, move the might_sleep() (the down_read() already has a might_sleep() check). This debloats a bit and simplifies this API. Update all call sites. No change in functionality. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/cgroup-defs.h | 13 ++++++++----- include/linux/sched.h | 28 ---------------------------- 2 files changed, 8 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 3c02404cfce9..6a3f850cabab 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -531,8 +531,8 @@ extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups * @tsk: target task * - * Called from threadgroup_change_begin() and allows cgroup operations to - * synchronize against threadgroup changes using a percpu_rw_semaphore. + * Allows cgroup operations to synchronize against threadgroup changes + * using a percpu_rw_semaphore. */ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) { @@ -543,8 +543,7 @@ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups * @tsk: target task * - * Called from threadgroup_change_end(). Counterpart of - * cgroup_threadcgroup_change_begin(). + * Counterpart of cgroup_threadcgroup_change_begin(). */ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) { @@ -555,7 +554,11 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) #define CGROUP_SUBSYS_COUNT 0 -static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {} +static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) +{ + might_sleep(); +} + static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} #endif /* CONFIG_CGROUPS */ diff --git a/include/linux/sched.h b/include/linux/sched.h index e732881517f2..3f61baac928b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3162,34 +3162,6 @@ static inline void unlock_task_sighand(struct task_struct *tsk, spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); } -/** - * threadgroup_change_begin - mark the beginning of changes to a threadgroup - * @tsk: task causing the changes - * - * All operations which modify a threadgroup - a new thread joining the - * group, death of a member thread (the assertion of PF_EXITING) and - * exec(2) dethreading the process and replacing the leader - are wrapped - * by threadgroup_change_{begin|end}(). This is to provide a place which - * subsystems needing threadgroup stability can hook into for - * synchronization. - */ -static inline void threadgroup_change_begin(struct task_struct *tsk) -{ - might_sleep(); - cgroup_threadgroup_change_begin(tsk); -} - -/** - * threadgroup_change_end - mark the end of changes to a threadgroup - * @tsk: task causing the changes - * - * See threadgroup_change_begin(). - */ -static inline void threadgroup_change_end(struct task_struct *tsk) -{ - cgroup_threadgroup_change_end(tsk); -} - #ifdef CONFIG_THREAD_INFO_IN_TASK static inline struct thread_info *task_thread_info(struct task_struct *task) -- cgit v1.2.3 From 314ff7851fc8ea66cbf48eaa93d8ebfb5ca084a9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 11:03:31 +0100 Subject: mm/vmacache, sched/headers: Introduce 'struct vmacache' and move it from to The header includes various vmacache related defines, which are arguably misplaced. Move them to mm_types.h and minimize the sched.h impact by putting all task vmacache state into a new 'struct vmacache' structure. No change in functionality. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/mm_types.h | 12 ++++++++++++ include/linux/sched.h | 11 ++++------- include/linux/vmacache.h | 2 +- 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4f6d440ad785..137797cd7b50 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -360,6 +360,18 @@ struct vm_area_struct { struct vm_userfaultfd_ctx vm_userfaultfd_ctx; }; +/* + * The per task VMA cache array: + */ +#define VMACACHE_BITS 2 +#define VMACACHE_SIZE (1U << VMACACHE_BITS) +#define VMACACHE_MASK (VMACACHE_SIZE - 1) + +struct vmacache { + u32 seqnum; + struct vm_area_struct *vmas[VMACACHE_SIZE]; +}; + struct core_thread { struct task_struct *task; struct core_thread *next; diff --git a/include/linux/sched.h b/include/linux/sched.h index 3f61baac928b..e87c97e1a947 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -134,10 +134,6 @@ struct blk_plug; struct filename; struct nameidata; -#define VMACACHE_BITS 2 -#define VMACACHE_SIZE (1U << VMACACHE_BITS) -#define VMACACHE_MASK (VMACACHE_SIZE - 1) - /* * These are the constant used to fake the fixed-point load-average * counting. Some notes: @@ -1550,9 +1546,10 @@ struct task_struct { #endif struct mm_struct *mm, *active_mm; - /* per-thread vma caching */ - u32 vmacache_seqnum; - struct vm_area_struct *vmacache[VMACACHE_SIZE]; + + /* Per-thread vma caching: */ + struct vmacache vmacache; + #if defined(SPLIT_RSS_COUNTING) struct task_rss_stat rss_stat; #endif diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h index c3fa0fd43949..1081db987391 100644 --- a/include/linux/vmacache.h +++ b/include/linux/vmacache.h @@ -12,7 +12,7 @@ static inline void vmacache_flush(struct task_struct *tsk) { - memset(tsk->vmacache, 0, sizeof(tsk->vmacache)); + memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas)); } extern void vmacache_flush_all(struct mm_struct *mm); -- cgit v1.2.3 From af8601ad420f6afa6445c927ad9f36d9700d96d6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 09:57:00 +0100 Subject: kasan, sched/headers: Uninline kasan_enable/disable_current() is a low level header that is included early in affected kernel headers. But it includes which complicates the cleanup of sched.h dependencies. But kasan.h has almost no need for sched.h: its only use of scheduler functionality is in two inline functions which are not used very frequently - so uninline kasan_enable_current() and kasan_disable_current(). Also add a dependency to a .c file that depended on kasan.h including it. This paves the way to remove the include from kasan.h. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/kasan.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index c908b25bf5a5..7793036eac80 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -30,16 +30,10 @@ static inline void *kasan_mem_to_shadow(const void *addr) } /* Enable reporting bugs after kasan_disable_current() */ -static inline void kasan_enable_current(void) -{ - current->kasan_depth++; -} +extern void kasan_enable_current(void); /* Disable reporting bugs for current task */ -static inline void kasan_disable_current(void) -{ - current->kasan_depth--; -} +extern void kasan_disable_current(void); void kasan_unpoison_shadow(const void *address, size_t size); -- cgit v1.2.3 From 105ab3d8ce7269887d24d224054677125e18037c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 16:36:40 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/cpuset.h | 1 + include/linux/sched/topology.h | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 include/linux/sched/topology.h (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index bfc204e70338..c608c39cb161 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -9,6 +9,7 @@ */ #include +#include #include #include #include diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h new file mode 100644 index 000000000000..4f1159f76f92 --- /dev/null +++ b/include/linux/sched/topology.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_TOPOLOGY_H +#define _LINUX_SCHED_TOPOLOGY_H + +#include + +#endif /* _LINUX_SCHED_TOPOLOGY_H */ -- cgit v1.2.3 From 4c822698cba8bdd93724117eded12bf34eb80252 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 16:36:40 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/idle.h | 6 ++++++ include/linux/sched/topology.h | 2 ++ 2 files changed, 8 insertions(+) create mode 100644 include/linux/sched/idle.h (limited to 'include/linux') diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h new file mode 100644 index 000000000000..a3cf79b86986 --- /dev/null +++ b/include/linux/sched/idle.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_IDLE_H +#define _LINUX_SCHED_IDLE_H + +#include + +#endif /* _LINUX_SCHED_IDLE_H */ diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 4f1159f76f92..184b56b8aa64 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -3,4 +3,6 @@ #include +#include + #endif /* _LINUX_SCHED_TOPOLOGY_H */ -- cgit v1.2.3 From 84f001e15737f8214b0f5f0f7dfec0fb1027938f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 16:36:40 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched/wake_q.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/sched/wake_q.h (limited to 'include/linux') diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h new file mode 100644 index 000000000000..6383b35e4eba --- /dev/null +++ b/include/linux/sched/wake_q.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_WAKE_Q_H +#define _LINUX_SCHED_WAKE_Q_H + +#include + +#endif /* _LINUX_SCHED_WAKE_Q_H */ -- cgit v1.2.3 From e601757102cfd3eeae068f53b3bc1234f3a2b2e9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 16:36:40 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/blkdev.h | 1 + include/linux/sched/clock.h | 6 ++++++ include/linux/skbuff.h | 1 + 3 files changed, 8 insertions(+) create mode 100644 include/linux/sched/clock.h (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aecca0e7d9ca..796016e63c1d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -2,6 +2,7 @@ #define _LINUX_BLKDEV_H #include +#include #ifdef CONFIG_BLOCK diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h new file mode 100644 index 000000000000..7cb7d79b2cf9 --- /dev/null +++ b/include/linux/sched/clock.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_CLOCK_H +#define _LINUX_SCHED_CLOCK_H + +#include + +#endif /* _LINUX_SCHED_CLOCK_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 69ccd2636911..c776abd86937 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 4f17722c7256af8e17c2c4f29f170247264bdf48 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 08:45:17 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/loadavg.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/sched/loadavg.h (limited to 'include/linux') diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h new file mode 100644 index 000000000000..3ae74be327e8 --- /dev/null +++ b/include/linux/sched/loadavg.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_LOADAVG_H +#define _LINUX_SCHED_LOADAVG_H + +#include + +#endif /* _LINUX_SCHED_LOADAVG_H */ -- cgit v1.2.3 From 4eb5aaa3af8a54e5e9bac90e2b42bbab1f1ee5a3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:29 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/autogroup.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/sched/autogroup.h (limited to 'include/linux') diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h new file mode 100644 index 000000000000..d745f22e57dc --- /dev/null +++ b/include/linux/sched/autogroup.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_AUTOGROUP_H +#define _LINUX_SCHED_AUTOGROUP_H + +#include + +#endif /* _LINUX_SCHED_AUTOGROUP_H */ -- cgit v1.2.3 From 6e84f31522f931027bf695752087ece278c10d3f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:29 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. The APIs that are going to be moved first are: mm_alloc() __mmdrop() mmdrop() mmdrop_async_fn() mmdrop_async() mmget_not_zero() mmput() mmput_async() get_task_mm() mm_access() mm_release() Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/mm.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/sched/mm.h (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h new file mode 100644 index 000000000000..a7adba1cd0a9 --- /dev/null +++ b/include/linux/sched/mm.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_MM_H +#define _LINUX_SCHED_MM_H + +#include + +#endif /* _LINUX_SCHED_MM_H */ -- cgit v1.2.3 From f7ccbae45c5e2c1077654b0e857e7efb1aa31c92 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:30 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/khugepaged.h | 3 ++- include/linux/ksm.h | 1 + include/linux/sched/coredump.h | 6 ++++++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 include/linux/sched/coredump.h (limited to 'include/linux') diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 1e032a1ddb3e..5d9a400af509 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -1,7 +1,8 @@ #ifndef _LINUX_KHUGEPAGED_H #define _LINUX_KHUGEPAGED_H -#include /* MMF_VM_HUGEPAGE */ +#include /* MMF_VM_HUGEPAGE */ + #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern struct attribute_group khugepaged_attr_group; diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 481c8c4627ca..e1cfda4bee58 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -12,6 +12,7 @@ #include #include #include +#include struct stable_node; struct mem_cgroup; diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h new file mode 100644 index 000000000000..ab81e564e076 --- /dev/null +++ b/include/linux/sched/coredump.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_COREDUMP_H +#define _LINUX_SCHED_COREDUMP_H + +#include + +#endif /* _LINUX_SCHED_COREDUMP_H */ -- cgit v1.2.3 From 3f07c0144132e4f59d88055ac8ff3e691a5fa2b8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:30 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/oom.h | 2 +- include/linux/ptrace.h | 1 + include/linux/sched/signal.h | 6 ++++++ include/linux/signalfd.h | 2 +- include/linux/taskstats_kern.h | 2 +- 5 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 include/linux/sched/signal.h (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index b4e36e92bc87..8a266e2be5a6 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -2,7 +2,7 @@ #define __INCLUDE_LINUX_OOM_H -#include +#include #include #include #include diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index e0e539321ab9..422bc2e4cb6a 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -3,6 +3,7 @@ #include /* For unlikely. */ #include /* For struct task_struct. */ +#include /* For send_sig(), same_thread_group(), etc. */ #include /* for IS_ERR_VALUE */ #include /* For BUG_ON. */ #include /* For task_active_pid_ns. */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h new file mode 100644 index 000000000000..da69cd05cd68 --- /dev/null +++ b/include/linux/sched/signal.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_SIGNAL_H +#define _LINUX_SCHED_SIGNAL_H + +#include + +#endif /* _LINUX_SCHED_SIGNAL_H */ diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h index eadbe227c256..4985048640a7 100644 --- a/include/linux/signalfd.h +++ b/include/linux/signalfd.h @@ -8,7 +8,7 @@ #define _LINUX_SIGNALFD_H #include - +#include #ifdef CONFIG_SIGNALFD diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index 58de6edf751f..e2a5daf8d14f 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -8,7 +8,7 @@ #define _LINUX_TASKSTATS_KERN_H #include -#include +#include #include #ifdef CONFIG_TASKSTATS -- cgit v1.2.3 From 8703e8a465b1e9cadc3680b4b1248f5987e54518 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:30 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/cred.h | 2 +- include/linux/sched/user.h | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 include/linux/sched/user.h (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index f0e70a1bb3ac..045d33e48069 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -18,8 +18,8 @@ #include #include #include +#include -struct user_struct; struct cred; struct inode; diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h new file mode 100644 index 000000000000..83238e5ddadd --- /dev/null +++ b/include/linux/sched/user.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_USER_H +#define _LINUX_SCHED_USER_H + +#include + +#endif /* _LINUX_SCHED_USER_H */ -- cgit v1.2.3 From 55687da166bf51129ed6b110d7711f4c7560abe2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:31 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/cpufreq.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/sched/cpufreq.h (limited to 'include/linux') diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h new file mode 100644 index 000000000000..07ed9664fa20 --- /dev/null +++ b/include/linux/sched/cpufreq.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_CPUFREQ_H +#define _LINUX_SCHED_CPUFREQ_H + +#include + +#endif /* _LINUX_SCHED_CPUFREQ_H */ -- cgit v1.2.3 From 6a3827d7509cbf96b7e961f8957c1f01d1bcf894 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:31 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/numa_balancing.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/sched/numa_balancing.h (limited to 'include/linux') diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h new file mode 100644 index 000000000000..999182279f78 --- /dev/null +++ b/include/linux/sched/numa_balancing.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_NUMA_BALANCING_H +#define _LINUX_SCHED_NUMA_BALANCING_H + +#include + +#endif /* _LINUX_SCHED_NUMA_BALANCING_H */ -- cgit v1.2.3 From 1e4bae6468375f81a3d4bdf436deaa9ea294e12b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:32 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the file that is going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/jobctl.h | 4 ++++ include/linux/sched/signal.h | 1 + 2 files changed, 5 insertions(+) create mode 100644 include/linux/sched/jobctl.h (limited to 'include/linux') diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h new file mode 100644 index 000000000000..228e4644937b --- /dev/null +++ b/include/linux/sched/jobctl.h @@ -0,0 +1,4 @@ +#ifndef _LINUX_SCHED_JOBCTL_H +#define _LINUX_SCHED_JOBCTL_H + +#endif /* _LINUX_SCHED_JOBCTL_H */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index da69cd05cd68..ce93c4a02b79 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -2,5 +2,6 @@ #define _LINUX_SCHED_SIGNAL_H #include +#include #endif /* _LINUX_SCHED_SIGNAL_H */ -- cgit v1.2.3 From 5b825c3af1d8a0af4deb4a5eb349d0d0050c62e5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 17:54:15 +0100 Subject: sched/headers: Prepare to remove inclusion from Add #include dependencies to all .c files rely on sched.h doing that for them. Note that even if the count where we need to add extra headers seems high, it's still a net win, because is included in over 2,200 files ... Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/cred.h | 1 + include/linux/sched/signal.h | 1 + include/linux/wait.h | 1 + 3 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 045d33e48069..b03e7d049a64 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -18,6 +18,7 @@ #include #include #include +#include #include struct cred; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index ce93c4a02b79..0f4e9f4a43fd 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -1,6 +1,7 @@ #ifndef _LINUX_SCHED_SIGNAL_H #define _LINUX_SCHED_SIGNAL_H +#include #include #include diff --git a/include/linux/wait.h b/include/linux/wait.h index 1421132e9086..aacb1282d19a 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -6,6 +6,7 @@ #include #include #include + #include #include -- cgit v1.2.3 From 037741a6d4ab2e4b0580dae97aca963d8a8dc68f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 10:08:30 +0100 Subject: sched/headers: Prepare for the removal of from Fix up missing #includes in other places that rely on sched.h doing that for them. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/i2c.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index bed8fbb45f31..6b183521c616 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -30,6 +30,7 @@ #include /* for struct device */ #include /* for completion */ #include +#include #include /* for Host Notify IRQ */ #include /* for struct device_node */ #include /* for swab16 */ -- cgit v1.2.3 From cc5efc2323a89dcf1a02c17b9b9f255c5a6e0492 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 10:06:45 +0100 Subject: sched/headers: Prepare for the removal of various unrelated headers from We are going to remove the following header inclusions from : #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include Fix up a single .h file that got hold of via one of these headers. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/user_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 363e0e8082a9..08264641b502 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #define UID_GID_MAP_MAX_EXTENTS 5 -- cgit v1.2.3 From b12fb7f46af7d548503d75be59f493f958c6d1b3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:33 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the .c file that is going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/xacct.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/sched/xacct.h (limited to 'include/linux') diff --git a/include/linux/sched/xacct.h b/include/linux/sched/xacct.h new file mode 100644 index 000000000000..890f7ce5cd27 --- /dev/null +++ b/include/linux/sched/xacct.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_XACCT_H +#define _LINUX_SCHED_XACCT_H + +#include + +#endif /* _LINUX_SCHED_XACCT_H */ -- cgit v1.2.3 From 174cd4b1e5fbd0d74c68cf3a74f5bd4923485512 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 19:15:33 +0100 Subject: sched/headers: Prepare to move signal wakeup & sigpending methods from into Fix up affected files that include this signal functionality via sched.h. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sunrpc/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/types.h b/include/linux/sunrpc/types.h index d222f47550af..11a7536c0fd2 100644 --- a/include/linux/sunrpc/types.h +++ b/include/linux/sunrpc/types.h @@ -10,6 +10,7 @@ #define _LINUX_SUNRPC_TYPES_H_ #include +#include #include #include #include -- cgit v1.2.3 From fd7712337ff09a248df424c5843c149586a3f017 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 20:56:33 +0100 Subject: sched/headers: Prepare to remove the include from is still needed - also update other headers and .c files that depend on sched.h including gfp.h (and its sub-headers) for them. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + include/linux/sched/mm.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index e87c97e1a947..8ae7b3d85658 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -56,6 +56,7 @@ struct sched_param { #include #include #include +#include #include #include diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index a7adba1cd0a9..1cf7941bb946 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -2,5 +2,6 @@ #define _LINUX_SCHED_MM_H #include +#include #endif /* _LINUX_SCHED_MM_H */ -- cgit v1.2.3 From 03441a3482a31462c93509939a388877e3cd9261 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:35 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/stat.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/sched/stat.h (limited to 'include/linux') diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h new file mode 100644 index 000000000000..30fe012aecb0 --- /dev/null +++ b/include/linux/sched/stat.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_STAT_H +#define _LINUX_SCHED_STAT_H + +#include + +#endif /* _LINUX_SCHED_STAT_H */ -- cgit v1.2.3 From 370c91355c76cbcaad8ff79b4bb15a7f2ea59433 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:35 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/nohz.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/sched/nohz.h (limited to 'include/linux') diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h new file mode 100644 index 000000000000..fe6caf0dab10 --- /dev/null +++ b/include/linux/sched/nohz.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_NOHZ_H +#define _LINUX_SCHED_NOHZ_H + +#include + +#endif /* _LINUX_SCHED_NOHZ_H */ -- cgit v1.2.3 From b17b01533b719e9949e437abf66436a875739b40 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:35 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/debug.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/sched/debug.h (limited to 'include/linux') diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h new file mode 100644 index 000000000000..55bc0cd35fd5 --- /dev/null +++ b/include/linux/sched/debug.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_DEBUG_H +#define _LINUX_SCHED_DEBUG_H + +#include + +#endif /* _LINUX_SCHED_DEBUG_H */ -- cgit v1.2.3 From ef8bd77f332bb0a4e467d7171bbfc6c57aa08a88 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:36 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/hotplug.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/sched/hotplug.h (limited to 'include/linux') diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h new file mode 100644 index 000000000000..34670ed24894 --- /dev/null +++ b/include/linux/sched/hotplug.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_HOTPLUG_H +#define _LINUX_SCHED_HOTPLUG_H + +#include + +#endif /* _LINUX_SCHED_HOTPLUG_H */ -- cgit v1.2.3 From 299300258d1bc4e997b7db340a2e06636757fe2e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:36 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/task.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/sched/task.h (limited to 'include/linux') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h new file mode 100644 index 000000000000..0023c91ff821 --- /dev/null +++ b/include/linux/sched/task.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_TASK_H +#define _LINUX_SCHED_TASK_H + +#include + +#endif /* _LINUX_SCHED_TASK_H */ -- cgit v1.2.3 From 68db0cf10678630d286f4bbbbdfa102951a35faa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:37 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/elfcore.h | 2 ++ include/linux/perf_regs.h | 2 ++ include/linux/sched/task_stack.h | 6 ++++++ 3 files changed, 10 insertions(+) create mode 100644 include/linux/sched/task_stack.h (limited to 'include/linux') diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 698d51a0eea3..c8240a12c42d 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -3,6 +3,8 @@ #include #include +#include + #include #include diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index a5f98d53d732..9b7dd59fe28d 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -1,6 +1,8 @@ #ifndef _LINUX_PERF_REGS_H #define _LINUX_PERF_REGS_H +#include + struct perf_regs { __u64 abi; struct pt_regs *regs; diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h new file mode 100644 index 000000000000..933cd49824c2 --- /dev/null +++ b/include/linux/sched/task_stack.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_TASK_STACK_H +#define _LINUX_SCHED_TASK_STACK_H + +#include + +#endif /* _LINUX_SCHED_TASK_STACK_H */ -- cgit v1.2.3 From dfc3401a33086a3fd465468e171ea0e82430569b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 23:15:21 +0100 Subject: sched/headers: Prepare to move the 'root_task_group' declaration to Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 3a85d61f7614..452c9799318c 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From e6d930b4e0115eb0732eec0efb11c3b09a8000fc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 23:43:50 +0100 Subject: signals: Prepare to split out from Introduce dummy header and add dependencies to places that will depend on it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/signal.h | 3 +-- include/linux/signal_types.h | 7 +++++++ 2 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 include/linux/signal_types.h (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index 5308304993be..d1c2b05a7a55 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -1,9 +1,8 @@ #ifndef _LINUX_SIGNAL_H #define _LINUX_SIGNAL_H -#include #include -#include +#include struct task_struct; diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h new file mode 100644 index 000000000000..28799c88f490 --- /dev/null +++ b/include/linux/signal_types.h @@ -0,0 +1,7 @@ +#ifndef _LINUX_SIGNAL_TYPES_H +#define _LINUX_SIGNAL_TYPES_H + +#include +#include + +#endif /* _LINUX_SIGNAL_TYPES_H */ -- cgit v1.2.3 From f361bf4a66c9bfabace46f6ff5d97005c9b524fe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 23:47:37 +0100 Subject: sched/headers: Prepare for the reduction of 's signal API dependency Instead of including the full , we are going to include the types-only header in , to further decouple the scheduler header from the signal headers. This means that various files which relied on the full need to be updated to gain an explicit dependency on it. Update the code that relies on sched.h's inclusion of the header. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + include/linux/sched/signal.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8ae7b3d85658..ea05116bc3c2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -36,6 +36,7 @@ struct sched_param { #include #include #include +#include #include #include #include diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0f4e9f4a43fd..7e10a7824523 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -1,6 +1,7 @@ #ifndef _LINUX_SCHED_SIGNAL_H #define _LINUX_SCHED_SIGNAL_H +#include #include #include #include -- cgit v1.2.3 From 2e58f173ab89b29a1373088b8727133dbf7322b0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 4 Feb 2017 00:12:19 +0100 Subject: mm/headers, sched/headers: Prepare to split out of We are going to separate all the MM types that are embedded directly in 'struct task_struct' into the new header. Create a new that only contains some includes from mm_types.h itself. This should be trivially correct and easy to bisect to. (This patch does not materially move the types yet.) Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/mm_types.h | 6 +++--- include/linux/mm_types_task.h | 10 ++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) create mode 100644 include/linux/mm_types_task.h (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 137797cd7b50..6daaf0a51968 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1,9 +1,9 @@ #ifndef _LINUX_MM_TYPES_H #define _LINUX_MM_TYPES_H +#include + #include -#include -#include #include #include #include @@ -13,7 +13,7 @@ #include #include #include -#include + #include #ifndef AT_VECTOR_SIZE_ARCH diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h new file mode 100644 index 000000000000..2419d302f03c --- /dev/null +++ b/include/linux/mm_types_task.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_MM_TYPES_TASK_H +#define _LINUX_MM_TYPES_TASK_H + +#include +#include +#include + +#include + +#endif /* _LINUX_MM_TYPES_TASK_H */ -- cgit v1.2.3 From 589ee62844e042b0b7d19ef57fb4cff77f3ca294 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 4 Feb 2017 00:16:44 +0100 Subject: sched/headers: Prepare to remove the dependency from Update code that relied on sched.h including various MM types for them. This will allow us to remove the include from . Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 1 + include/linux/sched/mm.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 452c9799318c..f6841c19c913 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -15,6 +15,7 @@ #include #include #include +#include #include diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 1cf7941bb946..d32e3932b2e3 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -2,6 +2,7 @@ #define _LINUX_SCHED_MM_H #include +#include #include #endif /* _LINUX_SCHED_MM_H */ -- cgit v1.2.3 From 9164bb4a18dfa592cd0aca455ea57abf89ca4526 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 4 Feb 2017 01:20:53 +0100 Subject: sched/headers: Prepare to move 'init_task' and 'init_thread_union' from to Update all usage sites first. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/signal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 7e10a7824523..320eca0446cd 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -5,5 +5,6 @@ #include #include #include +#include #endif /* _LINUX_SCHED_SIGNAL_H */ -- cgit v1.2.3 From b2d091031075ac9a1598e3cc3a29c28f02e64c0d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 4 Feb 2017 01:27:20 +0100 Subject: sched/headers: Prepare to use instead of in We don't actually need the full rculist.h header in sched.h anymore, we will be able to include the smaller rcupdate.h header instead. But first update code that relied on the implicit header inclusion. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/dmar.h | 2 +- include/linux/pid.h | 2 +- include/linux/rhashtable.h | 2 +- include/linux/sched/signal.h | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e9bc9292bd3a..e8ffba1052d3 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -26,7 +26,7 @@ #include #include #include -#include +#include struct acpi_dmar_header; diff --git a/include/linux/pid.h b/include/linux/pid.h index 298ead5512e5..4d179316e431 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -1,7 +1,7 @@ #ifndef _LINUX_PID_H #define _LINUX_PID_H -#include +#include enum pid_type { diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f2e12a845910..092292b6675e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -25,7 +25,7 @@ #include #include #include -#include +#include /* * The end of the chain is marked with a special nulls marks which has diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 320eca0446cd..c6958a53fef3 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -1,6 +1,7 @@ #ifndef _LINUX_SCHED_SIGNAL_H #define _LINUX_SCHED_SIGNAL_H +#include #include #include #include -- cgit v1.2.3 From f719ff9bcee2a422647790f12d53d3755f47c727 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 6 Feb 2017 10:57:33 +0100 Subject: sched/headers: Prepare to move the task_lock()/unlock() APIs to But first update the code that uses these facilities with the new header. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/cpuset.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index c608c39cb161..611fce58d670 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -10,6 +10,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From 32ef5517c298042ed58408545f475df43afe1f24 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 11:48:36 +0100 Subject: sched/headers: Prepare to move cputime functionality from into Introduce a trivial, mostly empty header to prepare for the moving of cputime functionality out of sched.h. Update all code that relies on these facilities. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/cputime.h | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 include/linux/sched/cputime.h (limited to 'include/linux') diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h new file mode 100644 index 000000000000..6ed4fe43de28 --- /dev/null +++ b/include/linux/sched/cputime.h @@ -0,0 +1,7 @@ +#ifndef _LINUX_SCHED_CPUTIME_H +#define _LINUX_SCHED_CPUTIME_H + +#include +#include + +#endif /* _LINUX_SCHED_CPUTIME_H */ -- cgit v1.2.3 From 1777e4635507265ba53d8dc4cd248e7d7c306fa0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 14:47:12 +0100 Subject: sched/headers: Prepare to move _init() prototypes from to But first introduce a trivial header and update usage sites. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/cpu.h | 2 ++ include/linux/sched/init.h | 6 ++++++ 2 files changed, 8 insertions(+) create mode 100644 include/linux/sched/init.h (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 21f9c74496e7..f92081234afd 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -30,6 +30,8 @@ struct cpu { extern void boot_cpu_init(void); extern void boot_cpu_state_init(void); +extern void cpu_init(void); +extern void trap_init(void); extern int register_cpu(struct cpu *cpu, int num); extern struct device *get_cpu_device(unsigned cpu); diff --git a/include/linux/sched/init.h b/include/linux/sched/init.h new file mode 100644 index 000000000000..15e46313e55e --- /dev/null +++ b/include/linux/sched/init.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_INIT_H +#define _LINUX_SCHED_INIT_H + +#include + +#endif /* _LINUX_SCHED_INIT_H */ -- cgit v1.2.3 From 50d34394cee68dd12c5e01fff073d1167700bfce Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 16:03:58 +0100 Subject: sched/headers: Prepare to remove the include from Update files that depend on the magic.h inclusion. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/task_stack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index 933cd49824c2..d2d578e85f7d 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -2,5 +2,6 @@ #define _LINUX_SCHED_TASK_STACK_H #include +#include #endif /* _LINUX_SCHED_TASK_STACK_H */ -- cgit v1.2.3 From b69339ba109549beeaf45c45c52ac7025bfcd954 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 16:15:03 +0100 Subject: sched/headers: Prepare to remove spurious inclusion dependencies In the following patches we are going to remove various headers from sched.h and other headers that sched.h includes. To make those patches build cleanly prepare the scene by adding dependencies to various files that learned to rely on those to-be-removed dependencies. These changes all make sense standalone: they add a header for a data type that a particular .c or .h file is using. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ea05116bc3c2..b5c6d602dfe9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -18,6 +18,7 @@ struct sched_param { #include #include #include +#include #include #include #include -- cgit v1.2.3 From a60b9eda67beac2318fa159545ba7d022f780736 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 16:36:40 +0100 Subject: sched/headers: Move scheduler topology interfaces to The vast majority of sched.h users does not require the topology types and interfaces, so split them out into . This reduces the size of linux/sched.h by ~6%. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 212 ---------------------------------------- include/linux/sched/topology.h | 213 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+), 212 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b5c6d602dfe9..55480782ce7f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -960,12 +960,6 @@ enum cpu_idle_type { # define SCHED_FIXEDPOINT_SHIFT 10 # define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) -/* - * Increase resolution of cpu_capacity calculations - */ -#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT -#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) - /* * Wake-queues are lists of tasks with a pending wakeup, whose * callers have already marked the task as woken internally, @@ -1016,214 +1010,8 @@ extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); extern void wake_up_q(struct wake_q_head *head); -/* - * sched-domains (multiprocessor balancing) declarations: - */ -#ifdef CONFIG_SMP -#define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */ -#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ -#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ -#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ -#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ -#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ -#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */ -#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */ -#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */ -#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ -#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ -#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ -#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ -#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ -#define SD_NUMA 0x4000 /* cross-node balancing */ - -#ifdef CONFIG_SCHED_SMT -static inline int cpu_smt_flags(void) -{ - return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; -} -#endif - -#ifdef CONFIG_SCHED_MC -static inline int cpu_core_flags(void) -{ - return SD_SHARE_PKG_RESOURCES; -} -#endif - -#ifdef CONFIG_NUMA -static inline int cpu_numa_flags(void) -{ - return SD_NUMA; -} -#endif - -extern int arch_asym_cpu_priority(int cpu); - -struct sched_domain_attr { - int relax_domain_level; -}; - -#define SD_ATTR_INIT (struct sched_domain_attr) { \ - .relax_domain_level = -1, \ -} - -extern int sched_domain_level_max; - -struct sched_group; - -struct sched_domain_shared { - atomic_t ref; - atomic_t nr_busy_cpus; - int has_idle_cores; -}; - -struct sched_domain { - /* These fields must be setup */ - struct sched_domain *parent; /* top domain must be null terminated */ - struct sched_domain *child; /* bottom domain must be null terminated */ - struct sched_group *groups; /* the balancing groups of the domain */ - unsigned long min_interval; /* Minimum balance interval ms */ - unsigned long max_interval; /* Maximum balance interval ms */ - unsigned int busy_factor; /* less balancing by factor if busy */ - unsigned int imbalance_pct; /* No balance until over watermark */ - unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ - unsigned int busy_idx; - unsigned int idle_idx; - unsigned int newidle_idx; - unsigned int wake_idx; - unsigned int forkexec_idx; - unsigned int smt_gain; - - int nohz_idle; /* NOHZ IDLE status */ - int flags; /* See SD_* */ - int level; - - /* Runtime fields. */ - unsigned long last_balance; /* init to jiffies. units in jiffies */ - unsigned int balance_interval; /* initialise to 1. units in ms. */ - unsigned int nr_balance_failed; /* initialise to 0 */ - - /* idle_balance() stats */ - u64 max_newidle_lb_cost; - unsigned long next_decay_max_lb_cost; - - u64 avg_scan_cost; /* select_idle_sibling */ - -#ifdef CONFIG_SCHEDSTATS - /* load_balance() stats */ - unsigned int lb_count[CPU_MAX_IDLE_TYPES]; - unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; - unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; - unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; - unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; - unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; - unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; - unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES]; - - /* Active load balancing */ - unsigned int alb_count; - unsigned int alb_failed; - unsigned int alb_pushed; - - /* SD_BALANCE_EXEC stats */ - unsigned int sbe_count; - unsigned int sbe_balanced; - unsigned int sbe_pushed; - - /* SD_BALANCE_FORK stats */ - unsigned int sbf_count; - unsigned int sbf_balanced; - unsigned int sbf_pushed; - - /* try_to_wake_up() stats */ - unsigned int ttwu_wake_remote; - unsigned int ttwu_move_affine; - unsigned int ttwu_move_balance; -#endif -#ifdef CONFIG_SCHED_DEBUG - char *name; -#endif - union { - void *private; /* used during construction */ - struct rcu_head rcu; /* used during destruction */ - }; - struct sched_domain_shared *shared; - - unsigned int span_weight; - /* - * Span of all CPUs in this domain. - * - * NOTE: this field is variable length. (Allocated dynamically - * by attaching extra space to the end of the structure, - * depending on how many CPUs the kernel has booted up with) - */ - unsigned long span[0]; -}; - -static inline struct cpumask *sched_domain_span(struct sched_domain *sd) -{ - return to_cpumask(sd->span); -} - -extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], - struct sched_domain_attr *dattr_new); - -/* Allocate an array of sched domains, for partition_sched_domains(). */ -cpumask_var_t *alloc_sched_domains(unsigned int ndoms); -void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); - -bool cpus_share_cache(int this_cpu, int that_cpu); - -typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); -typedef int (*sched_domain_flags_f)(void); - -#define SDTL_OVERLAP 0x01 - -struct sd_data { - struct sched_domain **__percpu sd; - struct sched_domain_shared **__percpu sds; - struct sched_group **__percpu sg; - struct sched_group_capacity **__percpu sgc; -}; - -struct sched_domain_topology_level { - sched_domain_mask_f mask; - sched_domain_flags_f sd_flags; - int flags; - int numa_level; - struct sd_data data; -#ifdef CONFIG_SCHED_DEBUG - char *name; -#endif -}; - -extern void set_sched_topology(struct sched_domain_topology_level *tl); extern void wake_up_if_idle(int cpu); -#ifdef CONFIG_SCHED_DEBUG -# define SD_INIT_NAME(type) .name = #type -#else -# define SD_INIT_NAME(type) -#endif - -#else /* CONFIG_SMP */ - -struct sched_domain_attr; - -static inline void -partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], - struct sched_domain_attr *dattr_new) -{ -} - -static inline bool cpus_share_cache(int this_cpu, int that_cpu) -{ - return true; -} - -#endif /* !CONFIG_SMP */ - - struct io_context; /* See blkdev.h */ diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 184b56b8aa64..b3550b36e65d 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -5,4 +5,217 @@ #include +/* + * sched-domains (multiprocessor balancing) declarations: + */ +#ifdef CONFIG_SMP + +#define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */ +#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ +#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ +#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ +#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ +#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ +#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */ +#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */ +#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */ +#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ +#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ +#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */ +#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ +#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ +#define SD_NUMA 0x4000 /* cross-node balancing */ + +/* + * Increase resolution of cpu_capacity calculations + */ +#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT +#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) + +#ifdef CONFIG_SCHED_SMT +static inline int cpu_smt_flags(void) +{ + return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; +} +#endif + +#ifdef CONFIG_SCHED_MC +static inline int cpu_core_flags(void) +{ + return SD_SHARE_PKG_RESOURCES; +} +#endif + +#ifdef CONFIG_NUMA +static inline int cpu_numa_flags(void) +{ + return SD_NUMA; +} +#endif + +extern int arch_asym_cpu_priority(int cpu); + +struct sched_domain_attr { + int relax_domain_level; +}; + +#define SD_ATTR_INIT (struct sched_domain_attr) { \ + .relax_domain_level = -1, \ +} + +extern int sched_domain_level_max; + +struct sched_group; + +struct sched_domain_shared { + atomic_t ref; + atomic_t nr_busy_cpus; + int has_idle_cores; +}; + +struct sched_domain { + /* These fields must be setup */ + struct sched_domain *parent; /* top domain must be null terminated */ + struct sched_domain *child; /* bottom domain must be null terminated */ + struct sched_group *groups; /* the balancing groups of the domain */ + unsigned long min_interval; /* Minimum balance interval ms */ + unsigned long max_interval; /* Maximum balance interval ms */ + unsigned int busy_factor; /* less balancing by factor if busy */ + unsigned int imbalance_pct; /* No balance until over watermark */ + unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ + unsigned int busy_idx; + unsigned int idle_idx; + unsigned int newidle_idx; + unsigned int wake_idx; + unsigned int forkexec_idx; + unsigned int smt_gain; + + int nohz_idle; /* NOHZ IDLE status */ + int flags; /* See SD_* */ + int level; + + /* Runtime fields. */ + unsigned long last_balance; /* init to jiffies. units in jiffies */ + unsigned int balance_interval; /* initialise to 1. units in ms. */ + unsigned int nr_balance_failed; /* initialise to 0 */ + + /* idle_balance() stats */ + u64 max_newidle_lb_cost; + unsigned long next_decay_max_lb_cost; + + u64 avg_scan_cost; /* select_idle_sibling */ + +#ifdef CONFIG_SCHEDSTATS + /* load_balance() stats */ + unsigned int lb_count[CPU_MAX_IDLE_TYPES]; + unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; + unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; + unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; + unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; + unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; + unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES]; + + /* Active load balancing */ + unsigned int alb_count; + unsigned int alb_failed; + unsigned int alb_pushed; + + /* SD_BALANCE_EXEC stats */ + unsigned int sbe_count; + unsigned int sbe_balanced; + unsigned int sbe_pushed; + + /* SD_BALANCE_FORK stats */ + unsigned int sbf_count; + unsigned int sbf_balanced; + unsigned int sbf_pushed; + + /* try_to_wake_up() stats */ + unsigned int ttwu_wake_remote; + unsigned int ttwu_move_affine; + unsigned int ttwu_move_balance; +#endif +#ifdef CONFIG_SCHED_DEBUG + char *name; +#endif + union { + void *private; /* used during construction */ + struct rcu_head rcu; /* used during destruction */ + }; + struct sched_domain_shared *shared; + + unsigned int span_weight; + /* + * Span of all CPUs in this domain. + * + * NOTE: this field is variable length. (Allocated dynamically + * by attaching extra space to the end of the structure, + * depending on how many CPUs the kernel has booted up with) + */ + unsigned long span[0]; +}; + +static inline struct cpumask *sched_domain_span(struct sched_domain *sd) +{ + return to_cpumask(sd->span); +} + +extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], + struct sched_domain_attr *dattr_new); + +/* Allocate an array of sched domains, for partition_sched_domains(). */ +cpumask_var_t *alloc_sched_domains(unsigned int ndoms); +void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); + +bool cpus_share_cache(int this_cpu, int that_cpu); + +typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); +typedef int (*sched_domain_flags_f)(void); + +#define SDTL_OVERLAP 0x01 + +struct sd_data { + struct sched_domain **__percpu sd; + struct sched_domain_shared **__percpu sds; + struct sched_group **__percpu sg; + struct sched_group_capacity **__percpu sgc; +}; + +struct sched_domain_topology_level { + sched_domain_mask_f mask; + sched_domain_flags_f sd_flags; + int flags; + int numa_level; + struct sd_data data; +#ifdef CONFIG_SCHED_DEBUG + char *name; +#endif +}; + +extern void set_sched_topology(struct sched_domain_topology_level *tl); + +#ifdef CONFIG_SCHED_DEBUG +# define SD_INIT_NAME(type) .name = #type +#else +# define SD_INIT_NAME(type) +#endif + +#else /* CONFIG_SMP */ + +struct sched_domain_attr; + +static inline void +partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], + struct sched_domain_attr *dattr_new) +{ +} + +static inline bool cpus_share_cache(int this_cpu, int that_cpu) +{ + return true; +} + +#endif /* !CONFIG_SMP */ + #endif /* _LINUX_SCHED_TOPOLOGY_H */ -- cgit v1.2.3 From b768917d2c08edc4b5616c86a569e524d190434b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 16:51:00 +0100 Subject: sched/headers: Move the 'cpu_idle_type' enum from to Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 7 ------- include/linux/sched/idle.h | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 55480782ce7f..8a8252e9e0c7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -943,13 +943,6 @@ static inline int sched_info_on(void) void force_schedstat_enabled(void); #endif -enum cpu_idle_type { - CPU_IDLE, - CPU_NOT_IDLE, - CPU_NEWLY_IDLE, - CPU_MAX_IDLE_TYPES -}; - /* * Integer metrics need fixed point arithmetic, e.g., sched/fair * has a few: load, load_avg, util_avg, freq, and capacity. diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h index a3cf79b86986..6aabc9968cba 100644 --- a/include/linux/sched/idle.h +++ b/include/linux/sched/idle.h @@ -3,4 +3,11 @@ #include +enum cpu_idle_type { + CPU_IDLE, + CPU_NOT_IDLE, + CPU_NEWLY_IDLE, + CPU_MAX_IDLE_TYPES +}; + #endif /* _LINUX_SCHED_IDLE_H */ -- cgit v1.2.3 From 4437722b0486c36dc90a1adf584fca4cea6cf1de Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 16:58:52 +0100 Subject: sched/headers: Move the wake_up_if_idle() prototype to No need to clutter with this rarely used prototype. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- include/linux/sched/idle.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8a8252e9e0c7..c30705c3e553 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1003,8 +1003,6 @@ extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); extern void wake_up_q(struct wake_q_head *head); -extern void wake_up_if_idle(int cpu); - struct io_context; /* See blkdev.h */ diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h index 6aabc9968cba..72b6b6ab6354 100644 --- a/include/linux/sched/idle.h +++ b/include/linux/sched/idle.h @@ -10,4 +10,6 @@ enum cpu_idle_type { CPU_MAX_IDLE_TYPES }; +extern void wake_up_if_idle(int cpu); + #endif /* _LINUX_SCHED_IDLE_H */ -- cgit v1.2.3 From 5dbe91de599423d243738a205367506444ebb4a4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 18:47:28 +0100 Subject: sched/headers: Move idle polling methods to Further reduce the size of by moving these APIs: tsk_is_polling() __current_set_polling() current_set_polling_and_test() __current_clr_polling() current_clr_polling_and_test() current_clr_polling() Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 76 ---------------------------------------------- include/linux/sched/idle.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 76 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c30705c3e553..4f512e337584 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3176,82 +3176,6 @@ static inline int spin_needbreak(spinlock_t *lock) #endif } -/* - * Idle thread specific functions to determine the need_resched - * polling state. - */ -#ifdef TIF_POLLING_NRFLAG -static inline int tsk_is_polling(struct task_struct *p) -{ - return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); -} - -static inline void __current_set_polling(void) -{ - set_thread_flag(TIF_POLLING_NRFLAG); -} - -static inline bool __must_check current_set_polling_and_test(void) -{ - __current_set_polling(); - - /* - * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_curr() - */ - smp_mb__after_atomic(); - - return unlikely(tif_need_resched()); -} - -static inline void __current_clr_polling(void) -{ - clear_thread_flag(TIF_POLLING_NRFLAG); -} - -static inline bool __must_check current_clr_polling_and_test(void) -{ - __current_clr_polling(); - - /* - * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_curr() - */ - smp_mb__after_atomic(); - - return unlikely(tif_need_resched()); -} - -#else -static inline int tsk_is_polling(struct task_struct *p) { return 0; } -static inline void __current_set_polling(void) { } -static inline void __current_clr_polling(void) { } - -static inline bool __must_check current_set_polling_and_test(void) -{ - return unlikely(tif_need_resched()); -} -static inline bool __must_check current_clr_polling_and_test(void) -{ - return unlikely(tif_need_resched()); -} -#endif - -static inline void current_clr_polling(void) -{ - __current_clr_polling(); - - /* - * Ensure we check TIF_NEED_RESCHED after we clear the polling bit. - * Once the bit is cleared, we'll get IPIs with every new - * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also - * fold. - */ - smp_mb(); /* paired with resched_curr() */ - - preempt_fold_need_resched(); -} - static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h index 72b6b6ab6354..66ee32f87f0b 100644 --- a/include/linux/sched/idle.h +++ b/include/linux/sched/idle.h @@ -12,4 +12,80 @@ enum cpu_idle_type { extern void wake_up_if_idle(int cpu); +/* + * Idle thread specific functions to determine the need_resched + * polling state. + */ +#ifdef TIF_POLLING_NRFLAG +static inline int tsk_is_polling(struct task_struct *p) +{ + return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); +} + +static inline void __current_set_polling(void) +{ + set_thread_flag(TIF_POLLING_NRFLAG); +} + +static inline bool __must_check current_set_polling_and_test(void) +{ + __current_set_polling(); + + /* + * Polling state must be visible before we test NEED_RESCHED, + * paired by resched_curr() + */ + smp_mb__after_atomic(); + + return unlikely(tif_need_resched()); +} + +static inline void __current_clr_polling(void) +{ + clear_thread_flag(TIF_POLLING_NRFLAG); +} + +static inline bool __must_check current_clr_polling_and_test(void) +{ + __current_clr_polling(); + + /* + * Polling state must be visible before we test NEED_RESCHED, + * paired by resched_curr() + */ + smp_mb__after_atomic(); + + return unlikely(tif_need_resched()); +} + +#else +static inline int tsk_is_polling(struct task_struct *p) { return 0; } +static inline void __current_set_polling(void) { } +static inline void __current_clr_polling(void) { } + +static inline bool __must_check current_set_polling_and_test(void) +{ + return unlikely(tif_need_resched()); +} +static inline bool __must_check current_clr_polling_and_test(void) +{ + return unlikely(tif_need_resched()); +} +#endif + +static inline void current_clr_polling(void) +{ + __current_clr_polling(); + + /* + * Ensure we check TIF_NEED_RESCHED after we clear the polling bit. + * Once the bit is cleared, we'll get IPIs with every new + * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also + * fold. + */ + smp_mb(); /* paired with resched_curr() */ + + preempt_fold_need_resched(); +} + #endif /* _LINUX_SCHED_IDLE_H */ -- cgit v1.2.3 From eb61baf69871b9836783a81bc451189edb0d9de2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 17:09:06 +0100 Subject: sched/headers: Move the wake-queue types and interfaces from sched.h into Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 54 ++++---------------------------------------- include/linux/sched/wake_q.h | 47 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f512e337584..5cda7cf09505 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -953,56 +953,6 @@ void force_schedstat_enabled(void); # define SCHED_FIXEDPOINT_SHIFT 10 # define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) -/* - * Wake-queues are lists of tasks with a pending wakeup, whose - * callers have already marked the task as woken internally, - * and can thus carry on. A common use case is being able to - * do the wakeups once the corresponding user lock as been - * released. - * - * We hold reference to each task in the list across the wakeup, - * thus guaranteeing that the memory is still valid by the time - * the actual wakeups are performed in wake_up_q(). - * - * One per task suffices, because there's never a need for a task to be - * in two wake queues simultaneously; it is forbidden to abandon a task - * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is - * already in a wake queue, the wakeup will happen soon and the second - * waker can just skip it. - * - * The DEFINE_WAKE_Q macro declares and initializes the list head. - * wake_up_q() does NOT reinitialize the list; it's expected to be - * called near the end of a function. Otherwise, the list can be - * re-initialized for later re-use by wake_q_init(). - * - * Note that this can cause spurious wakeups. schedule() callers - * must ensure the call is done inside a loop, confirming that the - * wakeup condition has in fact occurred. - */ -struct wake_q_node { - struct wake_q_node *next; -}; - -struct wake_q_head { - struct wake_q_node *first; - struct wake_q_node **lastp; -}; - -#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) - -#define DEFINE_WAKE_Q(name) \ - struct wake_q_head name = { WAKE_Q_TAIL, &name.first } - -static inline void wake_q_init(struct wake_q_head *head) -{ - head->first = WAKE_Q_TAIL; - head->lastp = &head->first; -} - -extern void wake_q_add(struct wake_q_head *head, - struct task_struct *task); -extern void wake_up_q(struct wake_q_head *head); - struct io_context; /* See blkdev.h */ @@ -1234,6 +1184,10 @@ enum perf_event_task_context { perf_nr_task_contexts, }; +struct wake_q_node { + struct wake_q_node *next; +}; + /* Track pages that require TLB flushes */ struct tlbflush_unmap_batch { /* diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h index 6383b35e4eba..d03d8a9047dc 100644 --- a/include/linux/sched/wake_q.h +++ b/include/linux/sched/wake_q.h @@ -1,6 +1,53 @@ #ifndef _LINUX_SCHED_WAKE_Q_H #define _LINUX_SCHED_WAKE_Q_H +/* + * Wake-queues are lists of tasks with a pending wakeup, whose + * callers have already marked the task as woken internally, + * and can thus carry on. A common use case is being able to + * do the wakeups once the corresponding user lock as been + * released. + * + * We hold reference to each task in the list across the wakeup, + * thus guaranteeing that the memory is still valid by the time + * the actual wakeups are performed in wake_up_q(). + * + * One per task suffices, because there's never a need for a task to be + * in two wake queues simultaneously; it is forbidden to abandon a task + * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is + * already in a wake queue, the wakeup will happen soon and the second + * waker can just skip it. + * + * The DEFINE_WAKE_Q macro declares and initializes the list head. + * wake_up_q() does NOT reinitialize the list; it's expected to be + * called near the end of a function. Otherwise, the list can be + * re-initialized for later re-use by wake_q_init(). + * + * Note that this can cause spurious wakeups. schedule() callers + * must ensure the call is done inside a loop, confirming that the + * wakeup condition has in fact occurred. + */ + #include +struct wake_q_head { + struct wake_q_node *first; + struct wake_q_node **lastp; +}; + +#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) + +#define DEFINE_WAKE_Q(name) \ + struct wake_q_head name = { WAKE_Q_TAIL, &name.first } + +static inline void wake_q_init(struct wake_q_head *head) +{ + head->first = WAKE_Q_TAIL; + head->lastp = &head->first; +} + +extern void wake_q_add(struct wake_q_head *head, + struct task_struct *task); +extern void wake_up_q(struct wake_q_head *head); + #endif /* _LINUX_SCHED_WAKE_Q_H */ -- cgit v1.2.3 From 5689810360c2e88ce1619e8bcfa859852f9a1d1a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 7 Feb 2017 12:07:18 +0100 Subject: sched/headers: Move scheduler clock interfaces to Move the sched_clock interfaces into a separate header file, to reduce the size of sched.h. Include in all files that made use of one of the Acked-by: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 100 -------------------------------------------- include/linux/sched/clock.h | 98 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 100 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5cda7cf09505..b42d2f9d673a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2190,103 +2190,6 @@ static inline void calc_load_exit_idle(void) { } #define cpu_relax_yield() cpu_relax() #endif -/* - * Do not use outside of architecture code which knows its limitations. - * - * sched_clock() has no promise of monotonicity or bounded drift between - * CPUs, use (which you should not) requires disabling IRQs. - * - * Please use one of the three interfaces below. - */ -extern unsigned long long notrace sched_clock(void); -/* - * See the comment in kernel/sched/clock.c - */ -extern u64 running_clock(void); -extern u64 sched_clock_cpu(int cpu); - - -extern void sched_clock_init(void); - -#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK -static inline void sched_clock_init_late(void) -{ -} - -static inline void sched_clock_tick(void) -{ -} - -static inline void clear_sched_clock_stable(void) -{ -} - -static inline void sched_clock_idle_sleep_event(void) -{ -} - -static inline void sched_clock_idle_wakeup_event(u64 delta_ns) -{ -} - -static inline u64 cpu_clock(int cpu) -{ - return sched_clock(); -} - -static inline u64 local_clock(void) -{ - return sched_clock(); -} -#else -extern void sched_clock_init_late(void); -/* - * Architectures can set this to 1 if they have specified - * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, - * but then during bootup it turns out that sched_clock() - * is reliable after all: - */ -extern int sched_clock_stable(void); -extern void clear_sched_clock_stable(void); - -extern void sched_clock_tick(void); -extern void sched_clock_idle_sleep_event(void); -extern void sched_clock_idle_wakeup_event(u64 delta_ns); - -/* - * As outlined in clock.c, provides a fast, high resolution, nanosecond - * time source that is monotonic per cpu argument and has bounded drift - * between cpus. - * - * ######################### BIG FAT WARNING ########################## - * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # - * # go backwards !! # - * #################################################################### - */ -static inline u64 cpu_clock(int cpu) -{ - return sched_clock_cpu(cpu); -} - -static inline u64 local_clock(void) -{ - return sched_clock_cpu(raw_smp_processor_id()); -} -#endif - -#ifdef CONFIG_IRQ_TIME_ACCOUNTING -/* - * An i/f to runtime opt-in for irq time accounting based off of sched_clock. - * The reason for this explicit opt-in is not to have perf penalty with - * slow sched_clocks. - */ -extern void enable_sched_clock_irqtime(void); -extern void disable_sched_clock_irqtime(void); -#else -static inline void enable_sched_clock_irqtime(void) {} -static inline void disable_sched_clock_irqtime(void) {} -#endif - extern unsigned long long task_sched_runtime(struct task_struct *task); @@ -2297,9 +2200,6 @@ extern void sched_exec(void); #define sched_exec() {} #endif -extern void sched_clock_idle_sleep_event(void); -extern void sched_clock_idle_wakeup_event(u64 delta_ns); - #ifdef CONFIG_HOTPLUG_CPU extern void idle_task_exit(void); #else diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h index 7cb7d79b2cf9..ac12f71d359c 100644 --- a/include/linux/sched/clock.h +++ b/include/linux/sched/clock.h @@ -3,4 +3,102 @@ #include +/* + * Do not use outside of architecture code which knows its limitations. + * + * sched_clock() has no promise of monotonicity or bounded drift between + * CPUs, use (which you should not) requires disabling IRQs. + * + * Please use one of the three interfaces below. + */ +extern unsigned long long notrace sched_clock(void); + +/* + * See the comment in kernel/sched/clock.c + */ +extern u64 running_clock(void); +extern u64 sched_clock_cpu(int cpu); + + +extern void sched_clock_init(void); + +#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +static inline void sched_clock_init_late(void) +{ +} + +static inline void sched_clock_tick(void) +{ +} + +static inline void clear_sched_clock_stable(void) +{ +} + +static inline void sched_clock_idle_sleep_event(void) +{ +} + +static inline void sched_clock_idle_wakeup_event(u64 delta_ns) +{ +} + +static inline u64 cpu_clock(int cpu) +{ + return sched_clock(); +} + +static inline u64 local_clock(void) +{ + return sched_clock(); +} +#else +extern void sched_clock_init_late(void); +/* + * Architectures can set this to 1 if they have specified + * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, + * but then during bootup it turns out that sched_clock() + * is reliable after all: + */ +extern int sched_clock_stable(void); +extern void clear_sched_clock_stable(void); + +extern void sched_clock_tick(void); +extern void sched_clock_idle_sleep_event(void); +extern void sched_clock_idle_wakeup_event(u64 delta_ns); + +/* + * As outlined in clock.c, provides a fast, high resolution, nanosecond + * time source that is monotonic per cpu argument and has bounded drift + * between cpus. + * + * ######################### BIG FAT WARNING ########################## + * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # + * # go backwards !! # + * #################################################################### + */ +static inline u64 cpu_clock(int cpu) +{ + return sched_clock_cpu(cpu); +} + +static inline u64 local_clock(void) +{ + return sched_clock_cpu(raw_smp_processor_id()); +} +#endif + +#ifdef CONFIG_IRQ_TIME_ACCOUNTING +/* + * An i/f to runtime opt-in for irq time accounting based off of sched_clock. + * The reason for this explicit opt-in is not to have perf penalty with + * slow sched_clocks. + */ +extern void enable_sched_clock_irqtime(void); +extern void disable_sched_clock_irqtime(void); +#else +static inline void enable_sched_clock_irqtime(void) {} +static inline void disable_sched_clock_irqtime(void) {} +#endif + #endif /* _LINUX_SCHED_CLOCK_H */ -- cgit v1.2.3 From 47913d4ebd99c827c82c4f29eb282a119c3f2aeb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 18:00:26 +0100 Subject: sched/headers, delayacct: Move the 'struct task_delay_info' definition from to The 'struct task_delay_info' definition does not have to be in sched.h, because task_struct only has a pointer to it. So move it to to reduce the size of . Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/delayacct.h | 36 ++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 39 ++++----------------------------------- 2 files changed, 40 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 00e60f79a9cc..6b769cbd1000 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -30,7 +30,43 @@ #define DELAYACCT_PF_BLKIO 0x00000002 /* I am waiting on IO */ #ifdef CONFIG_TASK_DELAY_ACCT +struct task_delay_info { + spinlock_t lock; + unsigned int flags; /* Private per-task flags */ + + /* For each stat XXX, add following, aligned appropriately + * + * struct timespec XXX_start, XXX_end; + * u64 XXX_delay; + * u32 XXX_count; + * + * Atomicity of updates to XXX_delay, XXX_count protected by + * single lock above (split into XXX_lock if contention is an issue). + */ + + /* + * XXX_count is incremented on every XXX operation, the delay + * associated with the operation is added to XXX_delay. + * XXX_delay contains the accumulated delay time in nanoseconds. + */ + u64 blkio_start; /* Shared by blkio, swapin */ + u64 blkio_delay; /* wait for sync block io completion */ + u64 swapin_delay; /* wait for swapin block io completion */ + u32 blkio_count; /* total count of the number of sync block */ + /* io operations performed */ + u32 swapin_count; /* total count of the number of swapin block */ + /* io operations performed */ + + u64 freepages_start; + u64 freepages_delay; /* wait for memory reclaim */ + u32 freepages_count; /* total count of memory reclaim */ +}; +#endif +#include +#include + +#ifdef CONFIG_TASK_DELAY_ACCT extern int delayacct_on; /* Delay accounting turned on/off */ extern struct kmem_cache *delayacct_cache; extern void delayacct_init(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index b42d2f9d673a..7d6998858fa3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -893,39 +893,7 @@ struct sched_info { }; #endif /* CONFIG_SCHED_INFO */ -#ifdef CONFIG_TASK_DELAY_ACCT -struct task_delay_info { - spinlock_t lock; - unsigned int flags; /* Private per-task flags */ - - /* For each stat XXX, add following, aligned appropriately - * - * struct timespec XXX_start, XXX_end; - * u64 XXX_delay; - * u32 XXX_count; - * - * Atomicity of updates to XXX_delay, XXX_count protected by - * single lock above (split into XXX_lock if contention is an issue). - */ - - /* - * XXX_count is incremented on every XXX operation, the delay - * associated with the operation is added to XXX_delay. - * XXX_delay contains the accumulated delay time in nanoseconds. - */ - u64 blkio_start; /* Shared by blkio, swapin */ - u64 blkio_delay; /* wait for sync block io completion */ - u64 swapin_delay; /* wait for swapin block io completion */ - u32 blkio_count; /* total count of the number of sync block */ - /* io operations performed */ - u32 swapin_count; /* total count of the number of swapin block */ - /* io operations performed */ - - u64 freepages_start; - u64 freepages_delay; /* wait for memory reclaim */ - u32 freepages_count; /* total count of memory reclaim */ -}; -#endif /* CONFIG_TASK_DELAY_ACCT */ +struct task_delay_info; static inline int sched_info_on(void) { @@ -1612,9 +1580,10 @@ struct task_struct { struct page_frag task_frag; -#ifdef CONFIG_TASK_DELAY_ACCT - struct task_delay_info *delays; +#ifdef CONFIG_TASK_DELAY_ACCT + struct task_delay_info *delays; #endif + #ifdef CONFIG_FAULT_INJECTION int make_it_fail; #endif -- cgit v1.2.3 From e2d1e2aec572a2138dea74d53be54a1406d419c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 18:07:51 +0100 Subject: sched/headers: Move various ABI definitions to Move scheduler ABI types (struct sched_attr, struct sched_param, etc.) into the new UAPI header. This further reduces the size and complexity of . Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 70 ++------------------------------------------------- 1 file changed, 2 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7d6998858fa3..5c481906e835 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -5,11 +5,6 @@ #include - -struct sched_param { - int sched_priority; -}; - #include /* for HZ */ #include @@ -64,69 +59,8 @@ struct sched_param { #include -#define SCHED_ATTR_SIZE_VER0 48 /* sizeof first published struct */ - -/* - * Extended scheduling parameters data structure. - * - * This is needed because the original struct sched_param can not be - * altered without introducing ABI issues with legacy applications - * (e.g., in sched_getparam()). - * - * However, the possibility of specifying more than just a priority for - * the tasks may be useful for a wide variety of application fields, e.g., - * multimedia, streaming, automation and control, and many others. - * - * This variant (sched_attr) is meant at describing a so-called - * sporadic time-constrained task. In such model a task is specified by: - * - the activation period or minimum instance inter-arrival time; - * - the maximum (or average, depending on the actual scheduling - * discipline) computation time of all instances, a.k.a. runtime; - * - the deadline (relative to the actual activation time) of each - * instance. - * Very briefly, a periodic (sporadic) task asks for the execution of - * some specific computation --which is typically called an instance-- - * (at most) every period. Moreover, each instance typically lasts no more - * than the runtime and must be completed by time instant t equal to - * the instance activation time + the deadline. - * - * This is reflected by the actual fields of the sched_attr structure: - * - * @size size of the structure, for fwd/bwd compat. - * - * @sched_policy task's scheduling policy - * @sched_flags for customizing the scheduler behaviour - * @sched_nice task's nice value (SCHED_NORMAL/BATCH) - * @sched_priority task's static priority (SCHED_FIFO/RR) - * @sched_deadline representative of the task's deadline - * @sched_runtime representative of the task's runtime - * @sched_period representative of the task's period - * - * Given this task model, there are a multiplicity of scheduling algorithms - * and policies, that can be used to ensure all the tasks will make their - * timing constraints. - * - * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the - * only user of this new interface. More information about the algorithm - * available in the scheduling class file or in Documentation/. - */ -struct sched_attr { - u32 size; - - u32 sched_policy; - u64 sched_flags; - - /* SCHED_NORMAL, SCHED_BATCH */ - s32 sched_nice; - - /* SCHED_FIFO, SCHED_RR */ - u32 sched_priority; - - /* SCHED_DEADLINE */ - u64 sched_runtime; - u64 sched_deadline; - u64 sched_period; -}; +struct sched_attr; +struct sched_param; struct futex_pi_state; struct robust_list_head; -- cgit v1.2.3 From dea38c74cb9205341f52b8d8ae18f61247a43ea8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 18:25:37 +0100 Subject: sched/headers: Move loadavg related definitions from to Move these bits to , to reduce the size and complexity of . Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 27 --------------------------- include/linux/sched/loadavg.h | 27 +++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5c481906e835..78adf7a0cac1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -71,31 +71,6 @@ struct blk_plug; struct filename; struct nameidata; -/* - * These are the constant used to fake the fixed-point load-average - * counting. Some notes: - * - 11 bit fractions expand to 22 bits by the multiplies: this gives - * a load-average precision of 10 bits integer + 11 bits fractional - * - if you want to count load-averages more often, you need more - * precision, or rounding will get you. With 2-second counting freq, - * the EXP_n values would be 1981, 2034 and 2043 if still using only - * 11 bit fractions. - */ -extern unsigned long avenrun[]; /* Load averages */ -extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); - -#define FSHIFT 11 /* nr of bits of precision */ -#define FIXED_1 (1<>= FSHIFT; - extern unsigned long total_forks; extern int nr_threads; DECLARE_PER_CPU(unsigned long, process_counts); @@ -106,8 +81,6 @@ extern unsigned long nr_iowait(void); extern unsigned long nr_iowait_cpu(int cpu); extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); -extern void calc_global_load(unsigned long ticks); - #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void cpu_load_update_nohz_start(void); extern void cpu_load_update_nohz_stop(void); diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h index 3ae74be327e8..c392e28ce0ac 100644 --- a/include/linux/sched/loadavg.h +++ b/include/linux/sched/loadavg.h @@ -3,4 +3,31 @@ #include +/* + * These are the constant used to fake the fixed-point load-average + * counting. Some notes: + * - 11 bit fractions expand to 22 bits by the multiplies: this gives + * a load-average precision of 10 bits integer + 11 bits fractional + * - if you want to count load-averages more often, you need more + * precision, or rounding will get you. With 2-second counting freq, + * the EXP_n values would be 1981, 2034 and 2043 if still using only + * 11 bit fractions. + */ +extern unsigned long avenrun[]; /* Load averages */ +extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); + +#define FSHIFT 11 /* nr of bits of precision */ +#define FIXED_1 (1<>= FSHIFT; + +extern void calc_global_load(unsigned long ticks); + #endif /* _LINUX_SCHED_LOADAVG_H */ -- cgit v1.2.3 From de8f1c77313d8c908b2897f268d466c13df161d4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 18:41:24 +0100 Subject: sched/headers: Move autogroup APIs into Further reduce the size of sched.h. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 18 ------------------ include/linux/sched/autogroup.h | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 78adf7a0cac1..042620729230 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2092,24 +2092,6 @@ static inline void wake_up_nohz_cpu(int cpu) { } extern u64 scheduler_tick_max_deferment(void); #endif -#ifdef CONFIG_SCHED_AUTOGROUP -extern void sched_autogroup_create_attach(struct task_struct *p); -extern void sched_autogroup_detach(struct task_struct *p); -extern void sched_autogroup_fork(struct signal_struct *sig); -extern void sched_autogroup_exit(struct signal_struct *sig); -extern void sched_autogroup_exit_task(struct task_struct *p); -#ifdef CONFIG_PROC_FS -extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); -extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); -#endif -#else -static inline void sched_autogroup_create_attach(struct task_struct *p) { } -static inline void sched_autogroup_detach(struct task_struct *p) { } -static inline void sched_autogroup_fork(struct signal_struct *sig) { } -static inline void sched_autogroup_exit(struct signal_struct *sig) { } -static inline void sched_autogroup_exit_task(struct task_struct *p) { } -#endif - extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h index d745f22e57dc..586bdf37330d 100644 --- a/include/linux/sched/autogroup.h +++ b/include/linux/sched/autogroup.h @@ -3,4 +3,25 @@ #include +struct signal_struct; +struct seq_file; + +#ifdef CONFIG_SCHED_AUTOGROUP +extern void sched_autogroup_create_attach(struct task_struct *p); +extern void sched_autogroup_detach(struct task_struct *p); +extern void sched_autogroup_fork(struct signal_struct *sig); +extern void sched_autogroup_exit(struct signal_struct *sig); +extern void sched_autogroup_exit_task(struct task_struct *p); +#ifdef CONFIG_PROC_FS +extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m); +extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice); +#endif +#else +static inline void sched_autogroup_create_attach(struct task_struct *p) { } +static inline void sched_autogroup_detach(struct task_struct *p) { } +static inline void sched_autogroup_fork(struct signal_struct *sig) { } +static inline void sched_autogroup_exit(struct signal_struct *sig) { } +static inline void sched_autogroup_exit_task(struct task_struct *p) { } +#endif + #endif /* _LINUX_SCHED_AUTOGROUP_H */ -- cgit v1.2.3 From 27ddb689909cd0bab30524a5f720ae3a3e55acac Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 1 Feb 2017 09:53:15 -0800 Subject: PCI: add an API to get node from vector Next patch will use the API to get the node from vector for nvme device Signed-off-by: Shaohua Li Reviewed-by: Christoph Hellwig Acked-by: Bjorn Helgaas Signed-off-by: Jens Axboe --- include/linux/pci.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 282ed32244ce..eb3da1a04e6c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1323,6 +1323,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec); +int pci_irq_get_node(struct pci_dev *pdev, int vec); #else static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } @@ -1370,6 +1371,11 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, { return cpu_possible_mask; } + +static inline int pci_irq_get_node(struct pci_dev *pdev, int vec) +{ + return first_online_node; +} #endif static inline int -- cgit v1.2.3 From 6bae363ee3057a14eec93440826813603559273a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 1 Mar 2017 14:22:10 -0500 Subject: blk-mq: Export blk_mq_freeze_queue_wait Drivers can start a freeze, so this provides a way to wait for frozen. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 001d30d727c5..8dacf680c851 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -245,6 +245,7 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q); void blk_mq_freeze_queue_start(struct request_queue *q); +void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_reinit_tagset(struct blk_mq_tag_set *set); int blk_mq_map_queues(struct blk_mq_tag_set *set); -- cgit v1.2.3 From f91328c40a559362b6e7b7bfee01ca17fda87592 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 1 Mar 2017 14:22:11 -0500 Subject: blk-mq: Provide freeze queue timeout A driver may wish to take corrective action if queued requests do not complete within a set time. Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8dacf680c851..b296a9006117 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -246,6 +246,8 @@ void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q); void blk_mq_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); +int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, + unsigned long timeout); int blk_mq_reinit_tagset(struct blk_mq_tag_set *set); int blk_mq_map_queues(struct blk_mq_tag_set *set); -- cgit v1.2.3 From 474c90156c8dcc2fa815e6716cc9394d7930cb9c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 2 Mar 2017 12:17:22 -0800 Subject: give up on gcc ilog2() constant optimizations gcc-7 has an "optimization" pass that completely screws up, and generates the code expansion for the (impossible) case of calling ilog2() with a zero constant, even when the code gcc compiles does not actually have a zero constant. And we try to generate a compile-time error for anybody doing ilog2() on a constant where that doesn't make sense (be it zero or negative). So now gcc7 will fail the build due to our sanity checking, because it created that constant-zero case that didn't actually exist in the source code. There's a whole long discussion on the kernel mailing about how to work around this gcc bug. The gcc people themselevs have discussed their "feature" in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72785 but it's all water under the bridge, because while it looked at one point like it would be solved by the time gcc7 was released, that was not to be. So now we have to deal with this compiler braindamage. And the only simple approach seems to be to just delete the code that tries to warn about bad uses of ilog2(). So now "ilog2()" will just return 0 not just for the value 1, but for any non-positive value too. It's not like I can recall anybody having ever actually tried to use this function on any invalid value, but maybe the sanity check just meant that such code never made it out in public. Reported-by: Laura Abbott Cc: John Stultz , Cc: Thomas Gleixner Cc: Ard Biesheuvel Signed-off-by: Linus Torvalds --- include/linux/log2.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/log2.h b/include/linux/log2.h index ef3d4f67118c..c373295f359f 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -15,12 +15,6 @@ #include #include -/* - * deal with unrepresentable constant logarithms - */ -extern __attribute__((const, noreturn)) -int ____ilog2_NaN(void); - /* * non-constant log of base 2 calculators * - the arch may override these in asm/bitops.h if they can be implemented @@ -85,7 +79,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ - (n) < 1 ? ____ilog2_NaN() : \ + (n) < 2 ? 0 : \ (n) & (1ULL << 63) ? 63 : \ (n) & (1ULL << 62) ? 62 : \ (n) & (1ULL << 61) ? 61 : \ @@ -148,10 +142,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ - (n) & (1ULL << 1) ? 1 : \ - (n) & (1ULL << 0) ? 0 : \ - ____ilog2_NaN() \ - ) : \ + 1 ) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ -- cgit v1.2.3 From 68e21be2916b359fd8afb536c1911dc014cfd03e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 19:08:20 +0100 Subject: sched/headers: Move task->mm handling methods to Move the following task->mm helper APIs into a new header file, , to further reduce the size and complexity of . Here are how the APIs are used in various kernel files: # mm_alloc(): arch/arm/mach-rpc/ecard.c fs/exec.c include/linux/sched/mm.h kernel/fork.c # __mmdrop(): arch/arc/include/asm/mmu_context.h include/linux/sched/mm.h kernel/fork.c # mmdrop(): arch/arm/mach-rpc/ecard.c arch/m68k/sun3/mmu_emu.c arch/x86/mm/tlb.c drivers/gpu/drm/amd/amdkfd/kfd_process.c drivers/gpu/drm/i915/i915_gem_userptr.c drivers/infiniband/hw/hfi1/file_ops.c drivers/vfio/vfio_iommu_spapr_tce.c fs/exec.c fs/proc/base.c fs/proc/task_mmu.c fs/proc/task_nommu.c fs/userfaultfd.c include/linux/mmu_notifier.h include/linux/sched/mm.h kernel/fork.c kernel/futex.c kernel/sched/core.c mm/khugepaged.c mm/ksm.c mm/mmu_context.c mm/mmu_notifier.c mm/oom_kill.c virt/kvm/kvm_main.c # mmdrop_async_fn(): include/linux/sched/mm.h # mmdrop_async(): include/linux/sched/mm.h kernel/fork.c # mmget_not_zero(): fs/userfaultfd.c include/linux/sched/mm.h mm/oom_kill.c # mmput(): arch/arc/include/asm/mmu_context.h arch/arc/kernel/troubleshoot.c arch/frv/mm/mmu-context.c arch/powerpc/platforms/cell/spufs/context.c arch/sparc/include/asm/mmu_context_32.h drivers/android/binder.c drivers/gpu/drm/etnaviv/etnaviv_gem.c drivers/gpu/drm/i915/i915_gem_userptr.c drivers/infiniband/core/umem.c drivers/infiniband/core/umem_odp.c drivers/infiniband/core/uverbs_main.c drivers/infiniband/hw/mlx4/main.c drivers/infiniband/hw/mlx5/main.c drivers/infiniband/hw/usnic/usnic_uiom.c drivers/iommu/amd_iommu_v2.c drivers/iommu/intel-svm.c drivers/lguest/lguest_user.c drivers/misc/cxl/fault.c drivers/misc/mic/scif/scif_rma.c drivers/oprofile/buffer_sync.c drivers/vfio/vfio_iommu_type1.c drivers/vhost/vhost.c drivers/xen/gntdev.c fs/exec.c fs/proc/array.c fs/proc/base.c fs/proc/task_mmu.c fs/proc/task_nommu.c fs/userfaultfd.c include/linux/sched/mm.h kernel/cpuset.c kernel/events/core.c kernel/events/uprobes.c kernel/exit.c kernel/fork.c kernel/ptrace.c kernel/sys.c kernel/trace/trace_output.c kernel/tsacct.c mm/memcontrol.c mm/memory.c mm/mempolicy.c mm/migrate.c mm/mmu_notifier.c mm/nommu.c mm/oom_kill.c mm/process_vm_access.c mm/rmap.c mm/swapfile.c mm/util.c virt/kvm/async_pf.c # mmput_async(): include/linux/sched/mm.h kernel/fork.c mm/oom_kill.c # get_task_mm(): arch/arc/kernel/troubleshoot.c arch/powerpc/platforms/cell/spufs/context.c drivers/android/binder.c drivers/gpu/drm/etnaviv/etnaviv_gem.c drivers/infiniband/core/umem.c drivers/infiniband/core/umem_odp.c drivers/infiniband/hw/mlx4/main.c drivers/infiniband/hw/mlx5/main.c drivers/infiniband/hw/usnic/usnic_uiom.c drivers/iommu/amd_iommu_v2.c drivers/iommu/intel-svm.c drivers/lguest/lguest_user.c drivers/misc/cxl/fault.c drivers/misc/mic/scif/scif_rma.c drivers/oprofile/buffer_sync.c drivers/vfio/vfio_iommu_type1.c drivers/vhost/vhost.c drivers/xen/gntdev.c fs/proc/array.c fs/proc/base.c fs/proc/task_mmu.c include/linux/sched/mm.h kernel/cpuset.c kernel/events/core.c kernel/exit.c kernel/fork.c kernel/ptrace.c kernel/sys.c kernel/trace/trace_output.c kernel/tsacct.c mm/memcontrol.c mm/memory.c mm/mempolicy.c mm/migrate.c mm/mmu_notifier.c mm/nommu.c mm/util.c # mm_access(): fs/proc/base.c include/linux/sched/mm.h kernel/fork.c mm/process_vm_access.c # mm_release(): arch/arc/include/asm/mmu_context.h fs/exec.c include/linux/sched/mm.h include/uapi/linux/sched.h kernel/exit.c kernel/fork.c Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 95 ------------------------------------------------ include/linux/sched/mm.h | 95 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 95 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 042620729230..d29bbe0ee41f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2379,101 +2379,6 @@ static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) return sp; } -/* - * Routines for handling mm_structs - */ -extern struct mm_struct * mm_alloc(void); - -/** - * mmgrab() - Pin a &struct mm_struct. - * @mm: The &struct mm_struct to pin. - * - * Make sure that @mm will not get freed even after the owning task - * exits. This doesn't guarantee that the associated address space - * will still exist later on and mmget_not_zero() has to be used before - * accessing it. - * - * This is a preferred way to to pin @mm for a longer/unbounded amount - * of time. - * - * Use mmdrop() to release the reference acquired by mmgrab(). - * - * See also for an in-depth explanation - * of &mm_struct.mm_count vs &mm_struct.mm_users. - */ -static inline void mmgrab(struct mm_struct *mm) -{ - atomic_inc(&mm->mm_count); -} - -/* mmdrop drops the mm and the page tables */ -extern void __mmdrop(struct mm_struct *); -static inline void mmdrop(struct mm_struct *mm) -{ - if (unlikely(atomic_dec_and_test(&mm->mm_count))) - __mmdrop(mm); -} - -static inline void mmdrop_async_fn(struct work_struct *work) -{ - struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); - __mmdrop(mm); -} - -static inline void mmdrop_async(struct mm_struct *mm) -{ - if (unlikely(atomic_dec_and_test(&mm->mm_count))) { - INIT_WORK(&mm->async_put_work, mmdrop_async_fn); - schedule_work(&mm->async_put_work); - } -} - -/** - * mmget() - Pin the address space associated with a &struct mm_struct. - * @mm: The address space to pin. - * - * Make sure that the address space of the given &struct mm_struct doesn't - * go away. This does not protect against parts of the address space being - * modified or freed, however. - * - * Never use this function to pin this address space for an - * unbounded/indefinite amount of time. - * - * Use mmput() to release the reference acquired by mmget(). - * - * See also for an in-depth explanation - * of &mm_struct.mm_count vs &mm_struct.mm_users. - */ -static inline void mmget(struct mm_struct *mm) -{ - atomic_inc(&mm->mm_users); -} - -static inline bool mmget_not_zero(struct mm_struct *mm) -{ - return atomic_inc_not_zero(&mm->mm_users); -} - -/* mmput gets rid of the mappings and all user-space */ -extern void mmput(struct mm_struct *); -#ifdef CONFIG_MMU -/* same as above but performs the slow path from the async context. Can - * be called from the atomic context as well - */ -extern void mmput_async(struct mm_struct *); -#endif - -/* Grab a reference to a task's mm, if it is not already going away */ -extern struct mm_struct *get_task_mm(struct task_struct *task); -/* - * Grab a reference to a task's mm, if it is not already going away - * and ptrace_may_access with the mode parameter passed to it - * succeeds. - */ -extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); -/* Remove the current tasks stale references to the old mm_struct */ -extern void mm_release(struct task_struct *, struct mm_struct *); - #ifdef CONFIG_HAVE_COPY_THREAD_TLS extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, struct task_struct *, unsigned long); diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index d32e3932b2e3..be1ae55f5ab9 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -5,4 +5,99 @@ #include #include +/* + * Routines for handling mm_structs + */ +extern struct mm_struct * mm_alloc(void); + +/** + * mmgrab() - Pin a &struct mm_struct. + * @mm: The &struct mm_struct to pin. + * + * Make sure that @mm will not get freed even after the owning task + * exits. This doesn't guarantee that the associated address space + * will still exist later on and mmget_not_zero() has to be used before + * accessing it. + * + * This is a preferred way to to pin @mm for a longer/unbounded amount + * of time. + * + * Use mmdrop() to release the reference acquired by mmgrab(). + * + * See also for an in-depth explanation + * of &mm_struct.mm_count vs &mm_struct.mm_users. + */ +static inline void mmgrab(struct mm_struct *mm) +{ + atomic_inc(&mm->mm_count); +} + +/* mmdrop drops the mm and the page tables */ +extern void __mmdrop(struct mm_struct *); +static inline void mmdrop(struct mm_struct *mm) +{ + if (unlikely(atomic_dec_and_test(&mm->mm_count))) + __mmdrop(mm); +} + +static inline void mmdrop_async_fn(struct work_struct *work) +{ + struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); + __mmdrop(mm); +} + +static inline void mmdrop_async(struct mm_struct *mm) +{ + if (unlikely(atomic_dec_and_test(&mm->mm_count))) { + INIT_WORK(&mm->async_put_work, mmdrop_async_fn); + schedule_work(&mm->async_put_work); + } +} + +/** + * mmget() - Pin the address space associated with a &struct mm_struct. + * @mm: The address space to pin. + * + * Make sure that the address space of the given &struct mm_struct doesn't + * go away. This does not protect against parts of the address space being + * modified or freed, however. + * + * Never use this function to pin this address space for an + * unbounded/indefinite amount of time. + * + * Use mmput() to release the reference acquired by mmget(). + * + * See also for an in-depth explanation + * of &mm_struct.mm_count vs &mm_struct.mm_users. + */ +static inline void mmget(struct mm_struct *mm) +{ + atomic_inc(&mm->mm_users); +} + +static inline bool mmget_not_zero(struct mm_struct *mm) +{ + return atomic_inc_not_zero(&mm->mm_users); +} + +/* mmput gets rid of the mappings and all user-space */ +extern void mmput(struct mm_struct *); +#ifdef CONFIG_MMU +/* same as above but performs the slow path from the async context. Can + * be called from the atomic context as well + */ +extern void mmput_async(struct mm_struct *); +#endif + +/* Grab a reference to a task's mm, if it is not already going away */ +extern struct mm_struct *get_task_mm(struct task_struct *task); +/* + * Grab a reference to a task's mm, if it is not already going away + * and ptrace_may_access with the mode parameter passed to it + * succeeds. + */ +extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); +/* Remove the current tasks stale references to the old mm_struct */ +extern void mm_release(struct task_struct *, struct mm_struct *); + #endif /* _LINUX_SCHED_MM_H */ -- cgit v1.2.3 From 11701c6768367294c5086738d49196192aaf3d60 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 19:21:47 +0100 Subject: sched/headers: Move task->mm coredumping related defines and methods from to This further reduces the size and complexity of . These are the definitions and APIs that are moved: # MMF_*: fs/binfmt_elf.c fs/binfmt_elf_fdpic.c fs/exec.c fs/proc/base.c include/linux/khugepaged.h include/linux/ksm.h include/linux/sched/coredump.h kernel/events/uprobes.c kernel/fork.c mm/huge_memory.c mm/khugepaged.c mm/ksm.c mm/memory.c mm/oom_kill.c # SUID_DUMP_*: arch/ia64/include/asm/processor.h fs/coredump.c fs/exec.c fs/proc/internal.h include/linux/sched/coredump.h kernel/ptrace.c kernel/sys.c kernel/sysctl.c # get_dumpable(): arch/ia64/include/asm/processor.h fs/coredump.c fs/exec.c fs/proc/internal.h include/linux/sched/coredump.h kernel/ptrace.c kernel/sys.c # set_dumpable(): fs/exec.c include/linux/sched/coredump.h kernel/cred.c kernel/sys.c Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 68 ----------------------------------------- include/linux/sched/coredump.h | 69 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d29bbe0ee41f..7934cd0acbc7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -361,74 +361,6 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} #endif -#define SUID_DUMP_DISABLE 0 /* No setuid dumping */ -#define SUID_DUMP_USER 1 /* Dump as user of process */ -#define SUID_DUMP_ROOT 2 /* Dump as root */ - -/* mm flags */ - -/* for SUID_DUMP_* above */ -#define MMF_DUMPABLE_BITS 2 -#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) - -extern void set_dumpable(struct mm_struct *mm, int value); -/* - * This returns the actual value of the suid_dumpable flag. For things - * that are using this for checking for privilege transitions, it must - * test against SUID_DUMP_USER rather than treating it as a boolean - * value. - */ -static inline int __get_dumpable(unsigned long mm_flags) -{ - return mm_flags & MMF_DUMPABLE_MASK; -} - -static inline int get_dumpable(struct mm_struct *mm) -{ - return __get_dumpable(mm->flags); -} - -/* coredump filter bits */ -#define MMF_DUMP_ANON_PRIVATE 2 -#define MMF_DUMP_ANON_SHARED 3 -#define MMF_DUMP_MAPPED_PRIVATE 4 -#define MMF_DUMP_MAPPED_SHARED 5 -#define MMF_DUMP_ELF_HEADERS 6 -#define MMF_DUMP_HUGETLB_PRIVATE 7 -#define MMF_DUMP_HUGETLB_SHARED 8 -#define MMF_DUMP_DAX_PRIVATE 9 -#define MMF_DUMP_DAX_SHARED 10 - -#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS -#define MMF_DUMP_FILTER_BITS 9 -#define MMF_DUMP_FILTER_MASK \ - (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) -#define MMF_DUMP_FILTER_DEFAULT \ - ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ - (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) - -#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS -# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) -#else -# define MMF_DUMP_MASK_DEFAULT_ELF 0 -#endif - /* leave room for more dump flags */ -#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ -#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ -/* - * This one-shot flag is dropped due to necessity of changing exe once again - * on NFS restore - */ -//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ - -#define MMF_HAS_UPROBES 19 /* has uprobes */ -#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ -#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ -#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ -#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ - -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) - struct sighand_struct { atomic_t count; struct k_sigaction action[_NSIG]; diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index ab81e564e076..f46912aa4f4c 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -2,5 +2,74 @@ #define _LINUX_SCHED_COREDUMP_H #include +#include + +#define SUID_DUMP_DISABLE 0 /* No setuid dumping */ +#define SUID_DUMP_USER 1 /* Dump as user of process */ +#define SUID_DUMP_ROOT 2 /* Dump as root */ + +/* mm flags */ + +/* for SUID_DUMP_* above */ +#define MMF_DUMPABLE_BITS 2 +#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) + +extern void set_dumpable(struct mm_struct *mm, int value); +/* + * This returns the actual value of the suid_dumpable flag. For things + * that are using this for checking for privilege transitions, it must + * test against SUID_DUMP_USER rather than treating it as a boolean + * value. + */ +static inline int __get_dumpable(unsigned long mm_flags) +{ + return mm_flags & MMF_DUMPABLE_MASK; +} + +static inline int get_dumpable(struct mm_struct *mm) +{ + return __get_dumpable(mm->flags); +} + +/* coredump filter bits */ +#define MMF_DUMP_ANON_PRIVATE 2 +#define MMF_DUMP_ANON_SHARED 3 +#define MMF_DUMP_MAPPED_PRIVATE 4 +#define MMF_DUMP_MAPPED_SHARED 5 +#define MMF_DUMP_ELF_HEADERS 6 +#define MMF_DUMP_HUGETLB_PRIVATE 7 +#define MMF_DUMP_HUGETLB_SHARED 8 +#define MMF_DUMP_DAX_PRIVATE 9 +#define MMF_DUMP_DAX_SHARED 10 + +#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS +#define MMF_DUMP_FILTER_BITS 9 +#define MMF_DUMP_FILTER_MASK \ + (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) +#define MMF_DUMP_FILTER_DEFAULT \ + ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ + (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) + +#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS +# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) +#else +# define MMF_DUMP_MASK_DEFAULT_ELF 0 +#endif + /* leave room for more dump flags */ +#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ +#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ +/* + * This one-shot flag is dropped due to necessity of changing exe once again + * on NFS restore + */ +//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ + +#define MMF_HAS_UPROBES 19 /* has uprobes */ +#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ +#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ +#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ +#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ + +#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) #endif /* _LINUX_SCHED_COREDUMP_H */ -- cgit v1.2.3 From c3edc4010e9d102eb7b8f17d15c2ebc425fed63c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 08:35:14 +0100 Subject: sched/headers: Move task_struct::signal and task_struct::sighand types and accessors into MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit task_struct::signal and task_struct::sighand are pointers, which would normally make it straightforward to not define those types in sched.h. That is not so, because the types are accompanied by a myriad of APIs (macros and inline functions) that dereference them. Split the types and the APIs out of sched.h and move them into a new header, . With this change sched.h does not know about 'struct signal' and 'struct sighand' anymore, trying to put accessors into sched.h as a test fails the following way: ./include/linux/sched.h: In function ‘test_signal_types’: ./include/linux/sched.h:2461:18: error: dereferencing pointer to incomplete type ‘struct signal_struct’ ^ This reduces the size and complexity of sched.h significantly. Update all headers and .c code that relied on getting the signal handling functionality from to include . The list of affected files in the preparatory patch was partly generated by grepping for the APIs, and partly by doing coverage build testing, both all[yes|mod|def|no]config builds on 64-bit and 32-bit x86, and an array of cross-architecture builds. Nevertheless some (trivial) build breakage is still expected related to rare Kconfig combinations and in-flight patches to various kernel code, but most of it should be handled by this patch. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 499 +----------------------------------------- include/linux/sched/signal.h | 502 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 505 insertions(+), 496 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7934cd0acbc7..c1586104d4c0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -71,6 +71,9 @@ struct blk_plug; struct filename; struct nameidata; +struct signal_struct; +struct sighand_struct; + extern unsigned long total_forks; extern int nr_threads; DECLARE_PER_CPU(unsigned long, process_counts); @@ -361,13 +364,6 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} #endif -struct sighand_struct { - atomic_t count; - struct k_sigaction action[_NSIG]; - spinlock_t siglock; - wait_queue_head_t signalfd_wqh; -}; - struct pacct_struct { int ac_flag; long ac_exitcode; @@ -485,195 +481,6 @@ struct thread_group_cputimer { #include struct autogroup; -/* - * NOTE! "signal_struct" does not have its own - * locking, because a shared signal_struct always - * implies a shared sighand_struct, so locking - * sighand_struct is always a proper superset of - * the locking of signal_struct. - */ -struct signal_struct { - atomic_t sigcnt; - atomic_t live; - int nr_threads; - struct list_head thread_head; - - wait_queue_head_t wait_chldexit; /* for wait4() */ - - /* current thread group signal load-balancing target: */ - struct task_struct *curr_target; - - /* shared signal handling: */ - struct sigpending shared_pending; - - /* thread group exit support */ - int group_exit_code; - /* overloaded: - * - notify group_exit_task when ->count is equal to notify_count - * - everyone except group_exit_task is stopped during signal delivery - * of fatal signals, group_exit_task processes the signal. - */ - int notify_count; - struct task_struct *group_exit_task; - - /* thread group stop support, overloads group_exit_code too */ - int group_stop_count; - unsigned int flags; /* see SIGNAL_* flags below */ - - /* - * PR_SET_CHILD_SUBREAPER marks a process, like a service - * manager, to re-parent orphan (double-forking) child processes - * to this process instead of 'init'. The service manager is - * able to receive SIGCHLD signals and is able to investigate - * the process until it calls wait(). All children of this - * process will inherit a flag if they should look for a - * child_subreaper process at exit. - */ - unsigned int is_child_subreaper:1; - unsigned int has_child_subreaper:1; - -#ifdef CONFIG_POSIX_TIMERS - - /* POSIX.1b Interval Timers */ - int posix_timer_id; - struct list_head posix_timers; - - /* ITIMER_REAL timer for the process */ - struct hrtimer real_timer; - ktime_t it_real_incr; - - /* - * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use - * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these - * values are defined to 0 and 1 respectively - */ - struct cpu_itimer it[2]; - - /* - * Thread group totals for process CPU timers. - * See thread_group_cputimer(), et al, for details. - */ - struct thread_group_cputimer cputimer; - - /* Earliest-expiration cache. */ - struct task_cputime cputime_expires; - - struct list_head cpu_timers[3]; - -#endif - - struct pid *leader_pid; - -#ifdef CONFIG_NO_HZ_FULL - atomic_t tick_dep_mask; -#endif - - struct pid *tty_old_pgrp; - - /* boolean value for session group leader */ - int leader; - - struct tty_struct *tty; /* NULL if no tty */ - -#ifdef CONFIG_SCHED_AUTOGROUP - struct autogroup *autogroup; -#endif - /* - * Cumulative resource counters for dead threads in the group, - * and for reaped dead child processes forked by this group. - * Live threads maintain their own counters and add to these - * in __exit_signal, except for the group leader. - */ - seqlock_t stats_lock; - u64 utime, stime, cutime, cstime; - u64 gtime; - u64 cgtime; - struct prev_cputime prev_cputime; - unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; - unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; - unsigned long inblock, oublock, cinblock, coublock; - unsigned long maxrss, cmaxrss; - struct task_io_accounting ioac; - - /* - * Cumulative ns of schedule CPU time fo dead threads in the - * group, not including a zombie group leader, (This only differs - * from jiffies_to_ns(utime + stime) if sched_clock uses something - * other than jiffies.) - */ - unsigned long long sum_sched_runtime; - - /* - * We don't bother to synchronize most readers of this at all, - * because there is no reader checking a limit that actually needs - * to get both rlim_cur and rlim_max atomically, and either one - * alone is a single word that can safely be read normally. - * getrlimit/setrlimit use task_lock(current->group_leader) to - * protect this instead of the siglock, because they really - * have no need to disable irqs. - */ - struct rlimit rlim[RLIM_NLIMITS]; - -#ifdef CONFIG_BSD_PROCESS_ACCT - struct pacct_struct pacct; /* per-process accounting information */ -#endif -#ifdef CONFIG_TASKSTATS - struct taskstats *stats; -#endif -#ifdef CONFIG_AUDIT - unsigned audit_tty; - struct tty_audit_buf *tty_audit_buf; -#endif - - /* - * Thread is the potential origin of an oom condition; kill first on - * oom - */ - bool oom_flag_origin; - short oom_score_adj; /* OOM kill score adjustment */ - short oom_score_adj_min; /* OOM kill score adjustment min value. - * Only settable by CAP_SYS_RESOURCE. */ - struct mm_struct *oom_mm; /* recorded mm when the thread group got - * killed by the oom killer */ - - struct mutex cred_guard_mutex; /* guard against foreign influences on - * credential calculations - * (notably. ptrace) */ -}; - -/* - * Bits in flags field of signal_struct. - */ -#define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ -#define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ -#define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ -#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */ -/* - * Pending notifications to parent. - */ -#define SIGNAL_CLD_STOPPED 0x00000010 -#define SIGNAL_CLD_CONTINUED 0x00000020 -#define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) - -#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ - -#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ - SIGNAL_STOP_CONTINUED) - -static inline void signal_set_stop_flags(struct signal_struct *sig, - unsigned int flags) -{ - WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); - sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; -} - -/* If true, all threads except ->group_exit_task have pending SIGKILL */ -static inline int signal_group_exit(const struct signal_struct *sig) -{ - return (sig->flags & SIGNAL_GROUP_EXIT) || - (sig->group_exit_task != NULL); -} - /* * Some day this will be a full-fledged user tracking system.. */ @@ -2126,190 +1933,8 @@ extern int sched_fork(unsigned long clone_flags, struct task_struct *p); extern void sched_dead(struct task_struct *p); extern void proc_caches_init(void); -extern void flush_signals(struct task_struct *); -extern void ignore_signals(struct task_struct *); -extern void flush_signal_handlers(struct task_struct *, int force_default); -extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); - -static inline int kernel_dequeue_signal(siginfo_t *info) -{ - struct task_struct *tsk = current; - siginfo_t __info; - int ret; - - spin_lock_irq(&tsk->sighand->siglock); - ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info); - spin_unlock_irq(&tsk->sighand->siglock); - - return ret; -} - -static inline void kernel_signal_stop(void) -{ - spin_lock_irq(¤t->sighand->siglock); - if (current->jobctl & JOBCTL_STOP_DEQUEUED) - __set_current_state(TASK_STOPPED); - spin_unlock_irq(¤t->sighand->siglock); - - schedule(); -} extern void release_task(struct task_struct * p); -extern int send_sig_info(int, struct siginfo *, struct task_struct *); -extern int force_sigsegv(int, struct task_struct *); -extern int force_sig_info(int, struct siginfo *, struct task_struct *); -extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); -extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); -extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *, - const struct cred *, u32); -extern int kill_pgrp(struct pid *pid, int sig, int priv); -extern int kill_pid(struct pid *pid, int sig, int priv); -extern int kill_proc_info(int, struct siginfo *, pid_t); -extern __must_check bool do_notify_parent(struct task_struct *, int); -extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); -extern void force_sig(int, struct task_struct *); -extern int send_sig(int, struct task_struct *, int); -extern int zap_other_threads(struct task_struct *p); -extern struct sigqueue *sigqueue_alloc(void); -extern void sigqueue_free(struct sigqueue *); -extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); -extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); - -#ifdef TIF_RESTORE_SIGMASK -/* - * Legacy restore_sigmask accessors. These are inefficient on - * SMP architectures because they require atomic operations. - */ - -/** - * set_restore_sigmask() - make sure saved_sigmask processing gets done - * - * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code - * will run before returning to user mode, to process the flag. For - * all callers, TIF_SIGPENDING is already set or it's no harm to set - * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the - * arch code will notice on return to user mode, in case those bits - * are scarce. We set TIF_SIGPENDING here to ensure that the arch - * signal code always gets run when TIF_RESTORE_SIGMASK is set. - */ -static inline void set_restore_sigmask(void) -{ - set_thread_flag(TIF_RESTORE_SIGMASK); - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); -} -static inline void clear_restore_sigmask(void) -{ - clear_thread_flag(TIF_RESTORE_SIGMASK); -} -static inline bool test_restore_sigmask(void) -{ - return test_thread_flag(TIF_RESTORE_SIGMASK); -} -static inline bool test_and_clear_restore_sigmask(void) -{ - return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK); -} - -#else /* TIF_RESTORE_SIGMASK */ - -/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */ -static inline void set_restore_sigmask(void) -{ - current->restore_sigmask = true; - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); -} -static inline void clear_restore_sigmask(void) -{ - current->restore_sigmask = false; -} -static inline bool test_restore_sigmask(void) -{ - return current->restore_sigmask; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - if (!current->restore_sigmask) - return false; - current->restore_sigmask = false; - return true; -} -#endif - -static inline void restore_saved_sigmask(void) -{ - if (test_and_clear_restore_sigmask()) - __set_current_blocked(¤t->saved_sigmask); -} - -static inline sigset_t *sigmask_to_save(void) -{ - sigset_t *res = ¤t->blocked; - if (unlikely(test_restore_sigmask())) - res = ¤t->saved_sigmask; - return res; -} - -static inline int kill_cad_pid(int sig, int priv) -{ - return kill_pid(cad_pid, sig, priv); -} - -/* These can be the second arg to send_sig_info/send_group_sig_info. */ -#define SEND_SIG_NOINFO ((struct siginfo *) 0) -#define SEND_SIG_PRIV ((struct siginfo *) 1) -#define SEND_SIG_FORCED ((struct siginfo *) 2) - -/* - * True if we are on the alternate signal stack. - */ -static inline int on_sig_stack(unsigned long sp) -{ - /* - * If the signal stack is SS_AUTODISARM then, by construction, we - * can't be on the signal stack unless user code deliberately set - * SS_AUTODISARM when we were already on it. - * - * This improves reliability: if user state gets corrupted such that - * the stack pointer points very close to the end of the signal stack, - * then this check will enable the signal to be handled anyway. - */ - if (current->sas_ss_flags & SS_AUTODISARM) - return 0; - -#ifdef CONFIG_STACK_GROWSUP - return sp >= current->sas_ss_sp && - sp - current->sas_ss_sp < current->sas_ss_size; -#else - return sp > current->sas_ss_sp && - sp - current->sas_ss_sp <= current->sas_ss_size; -#endif -} - -static inline int sas_ss_flags(unsigned long sp) -{ - if (!current->sas_ss_size) - return SS_DISABLE; - - return on_sig_stack(sp) ? SS_ONSTACK : 0; -} - -static inline void sas_ss_reset(struct task_struct *p) -{ - p->sas_ss_sp = 0; - p->sas_ss_size = 0; - p->sas_ss_flags = SS_DISABLE; -} - -static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) -{ - if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp)) -#ifdef CONFIG_STACK_GROWSUP - return current->sas_ss_sp; -#else - return current->sas_ss_sp + current->sas_ss_size; -#endif - return sp; -} #ifdef CONFIG_HAVE_COPY_THREAD_TLS extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, @@ -2338,10 +1963,8 @@ static inline void exit_thread(struct task_struct *tsk) #endif extern void exit_files(struct task_struct *); -extern void __cleanup_sighand(struct sighand_struct *); extern void exit_itimers(struct signal_struct *); -extern void flush_itimer_signals(void); extern void do_group_exit(int); @@ -2376,81 +1999,6 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, } #endif -#define tasklist_empty() \ - list_empty(&init_task.tasks) - -#define next_task(p) \ - list_entry_rcu((p)->tasks.next, struct task_struct, tasks) - -#define for_each_process(p) \ - for (p = &init_task ; (p = next_task(p)) != &init_task ; ) - -extern bool current_is_single_threaded(void); - -/* - * Careful: do_each_thread/while_each_thread is a double loop so - * 'break' will not work as expected - use goto instead. - */ -#define do_each_thread(g, t) \ - for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do - -#define while_each_thread(g, t) \ - while ((t = next_thread(t)) != g) - -#define __for_each_thread(signal, t) \ - list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) - -#define for_each_thread(p, t) \ - __for_each_thread((p)->signal, t) - -/* Careful: this is a double loop, 'break' won't work as expected. */ -#define for_each_process_thread(p, t) \ - for_each_process(p) for_each_thread(p, t) - -typedef int (*proc_visitor)(struct task_struct *p, void *data); -void walk_process_tree(struct task_struct *top, proc_visitor, void *); - -static inline int get_nr_threads(struct task_struct *tsk) -{ - return tsk->signal->nr_threads; -} - -static inline bool thread_group_leader(struct task_struct *p) -{ - return p->exit_signal >= 0; -} - -/* Do to the insanities of de_thread it is possible for a process - * to have the pid of the thread group leader without actually being - * the thread group leader. For iteration through the pids in proc - * all we care about is that we have a task with the appropriate - * pid, we don't actually care if we have the right task. - */ -static inline bool has_group_leader_pid(struct task_struct *p) -{ - return task_pid(p) == p->signal->leader_pid; -} - -static inline -bool same_thread_group(struct task_struct *p1, struct task_struct *p2) -{ - return p1->signal == p2->signal; -} - -static inline struct task_struct *next_thread(const struct task_struct *p) -{ - return list_entry_rcu(p->thread_group.next, - struct task_struct, thread_group); -} - -static inline int thread_group_empty(struct task_struct *p) -{ - return list_empty(&p->thread_group); -} - -#define delay_group_leader(p) \ - (thread_group_leader(p) && !thread_group_empty(p)) - /* * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also @@ -2471,25 +2019,6 @@ static inline void task_unlock(struct task_struct *p) spin_unlock(&p->alloc_lock); } -extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, - unsigned long *flags); - -static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, - unsigned long *flags) -{ - struct sighand_struct *ret; - - ret = __lock_task_sighand(tsk, flags); - (void)__cond_lock(&tsk->sighand->siglock, ret); - return ret; -} - -static inline void unlock_task_sighand(struct task_struct *tsk, - unsigned long *flags) -{ - spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); -} - #ifdef CONFIG_THREAD_INFO_IN_TASK static inline struct thread_info *task_thread_info(struct task_struct *task) @@ -2862,28 +2391,6 @@ static inline void mm_update_next_owner(struct mm_struct *mm) } #endif /* CONFIG_MEMCG */ -static inline unsigned long task_rlimit(const struct task_struct *tsk, - unsigned int limit) -{ - return READ_ONCE(tsk->signal->rlim[limit].rlim_cur); -} - -static inline unsigned long task_rlimit_max(const struct task_struct *tsk, - unsigned int limit) -{ - return READ_ONCE(tsk->signal->rlim[limit].rlim_max); -} - -static inline unsigned long rlimit(unsigned int limit) -{ - return task_rlimit(current, limit); -} - -static inline unsigned long rlimit_max(unsigned int limit) -{ - return task_rlimit_max(current, limit); -} - #define SCHED_CPUFREQ_RT (1U << 0) #define SCHED_CPUFREQ_DL (1U << 1) #define SCHED_CPUFREQ_IOWAIT (1U << 2) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index c6958a53fef3..53fe5450f431 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -8,4 +8,506 @@ #include #include +/* + * Types defining task->signal and task->sighand and APIs using them: + */ + +struct sighand_struct { + atomic_t count; + struct k_sigaction action[_NSIG]; + spinlock_t siglock; + wait_queue_head_t signalfd_wqh; +}; + +/* + * NOTE! "signal_struct" does not have its own + * locking, because a shared signal_struct always + * implies a shared sighand_struct, so locking + * sighand_struct is always a proper superset of + * the locking of signal_struct. + */ +struct signal_struct { + atomic_t sigcnt; + atomic_t live; + int nr_threads; + struct list_head thread_head; + + wait_queue_head_t wait_chldexit; /* for wait4() */ + + /* current thread group signal load-balancing target: */ + struct task_struct *curr_target; + + /* shared signal handling: */ + struct sigpending shared_pending; + + /* thread group exit support */ + int group_exit_code; + /* overloaded: + * - notify group_exit_task when ->count is equal to notify_count + * - everyone except group_exit_task is stopped during signal delivery + * of fatal signals, group_exit_task processes the signal. + */ + int notify_count; + struct task_struct *group_exit_task; + + /* thread group stop support, overloads group_exit_code too */ + int group_stop_count; + unsigned int flags; /* see SIGNAL_* flags below */ + + /* + * PR_SET_CHILD_SUBREAPER marks a process, like a service + * manager, to re-parent orphan (double-forking) child processes + * to this process instead of 'init'. The service manager is + * able to receive SIGCHLD signals and is able to investigate + * the process until it calls wait(). All children of this + * process will inherit a flag if they should look for a + * child_subreaper process at exit. + */ + unsigned int is_child_subreaper:1; + unsigned int has_child_subreaper:1; + +#ifdef CONFIG_POSIX_TIMERS + + /* POSIX.1b Interval Timers */ + int posix_timer_id; + struct list_head posix_timers; + + /* ITIMER_REAL timer for the process */ + struct hrtimer real_timer; + ktime_t it_real_incr; + + /* + * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use + * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these + * values are defined to 0 and 1 respectively + */ + struct cpu_itimer it[2]; + + /* + * Thread group totals for process CPU timers. + * See thread_group_cputimer(), et al, for details. + */ + struct thread_group_cputimer cputimer; + + /* Earliest-expiration cache. */ + struct task_cputime cputime_expires; + + struct list_head cpu_timers[3]; + +#endif + + struct pid *leader_pid; + +#ifdef CONFIG_NO_HZ_FULL + atomic_t tick_dep_mask; +#endif + + struct pid *tty_old_pgrp; + + /* boolean value for session group leader */ + int leader; + + struct tty_struct *tty; /* NULL if no tty */ + +#ifdef CONFIG_SCHED_AUTOGROUP + struct autogroup *autogroup; +#endif + /* + * Cumulative resource counters for dead threads in the group, + * and for reaped dead child processes forked by this group. + * Live threads maintain their own counters and add to these + * in __exit_signal, except for the group leader. + */ + seqlock_t stats_lock; + u64 utime, stime, cutime, cstime; + u64 gtime; + u64 cgtime; + struct prev_cputime prev_cputime; + unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; + unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; + unsigned long inblock, oublock, cinblock, coublock; + unsigned long maxrss, cmaxrss; + struct task_io_accounting ioac; + + /* + * Cumulative ns of schedule CPU time fo dead threads in the + * group, not including a zombie group leader, (This only differs + * from jiffies_to_ns(utime + stime) if sched_clock uses something + * other than jiffies.) + */ + unsigned long long sum_sched_runtime; + + /* + * We don't bother to synchronize most readers of this at all, + * because there is no reader checking a limit that actually needs + * to get both rlim_cur and rlim_max atomically, and either one + * alone is a single word that can safely be read normally. + * getrlimit/setrlimit use task_lock(current->group_leader) to + * protect this instead of the siglock, because they really + * have no need to disable irqs. + */ + struct rlimit rlim[RLIM_NLIMITS]; + +#ifdef CONFIG_BSD_PROCESS_ACCT + struct pacct_struct pacct; /* per-process accounting information */ +#endif +#ifdef CONFIG_TASKSTATS + struct taskstats *stats; +#endif +#ifdef CONFIG_AUDIT + unsigned audit_tty; + struct tty_audit_buf *tty_audit_buf; +#endif + + /* + * Thread is the potential origin of an oom condition; kill first on + * oom + */ + bool oom_flag_origin; + short oom_score_adj; /* OOM kill score adjustment */ + short oom_score_adj_min; /* OOM kill score adjustment min value. + * Only settable by CAP_SYS_RESOURCE. */ + struct mm_struct *oom_mm; /* recorded mm when the thread group got + * killed by the oom killer */ + + struct mutex cred_guard_mutex; /* guard against foreign influences on + * credential calculations + * (notably. ptrace) */ +}; + +/* + * Bits in flags field of signal_struct. + */ +#define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ +#define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ +#define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ +#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */ +/* + * Pending notifications to parent. + */ +#define SIGNAL_CLD_STOPPED 0x00000010 +#define SIGNAL_CLD_CONTINUED 0x00000020 +#define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) + +#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ + +#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ + SIGNAL_STOP_CONTINUED) + +static inline void signal_set_stop_flags(struct signal_struct *sig, + unsigned int flags) +{ + WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); + sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; +} + +/* If true, all threads except ->group_exit_task have pending SIGKILL */ +static inline int signal_group_exit(const struct signal_struct *sig) +{ + return (sig->flags & SIGNAL_GROUP_EXIT) || + (sig->group_exit_task != NULL); +} + +extern void flush_signals(struct task_struct *); +extern void ignore_signals(struct task_struct *); +extern void flush_signal_handlers(struct task_struct *, int force_default); +extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); + +static inline int kernel_dequeue_signal(siginfo_t *info) +{ + struct task_struct *tsk = current; + siginfo_t __info; + int ret; + + spin_lock_irq(&tsk->sighand->siglock); + ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info); + spin_unlock_irq(&tsk->sighand->siglock); + + return ret; +} + +static inline void kernel_signal_stop(void) +{ + spin_lock_irq(¤t->sighand->siglock); + if (current->jobctl & JOBCTL_STOP_DEQUEUED) + __set_current_state(TASK_STOPPED); + spin_unlock_irq(¤t->sighand->siglock); + + schedule(); +} +extern int send_sig_info(int, struct siginfo *, struct task_struct *); +extern int force_sigsegv(int, struct task_struct *); +extern int force_sig_info(int, struct siginfo *, struct task_struct *); +extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); +extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); +extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *, + const struct cred *, u32); +extern int kill_pgrp(struct pid *pid, int sig, int priv); +extern int kill_pid(struct pid *pid, int sig, int priv); +extern int kill_proc_info(int, struct siginfo *, pid_t); +extern __must_check bool do_notify_parent(struct task_struct *, int); +extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); +extern void force_sig(int, struct task_struct *); +extern int send_sig(int, struct task_struct *, int); +extern int zap_other_threads(struct task_struct *p); +extern struct sigqueue *sigqueue_alloc(void); +extern void sigqueue_free(struct sigqueue *); +extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); +extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); + +#ifdef TIF_RESTORE_SIGMASK +/* + * Legacy restore_sigmask accessors. These are inefficient on + * SMP architectures because they require atomic operations. + */ + +/** + * set_restore_sigmask() - make sure saved_sigmask processing gets done + * + * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code + * will run before returning to user mode, to process the flag. For + * all callers, TIF_SIGPENDING is already set or it's no harm to set + * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the + * arch code will notice on return to user mode, in case those bits + * are scarce. We set TIF_SIGPENDING here to ensure that the arch + * signal code always gets run when TIF_RESTORE_SIGMASK is set. + */ +static inline void set_restore_sigmask(void) +{ + set_thread_flag(TIF_RESTORE_SIGMASK); + WARN_ON(!test_thread_flag(TIF_SIGPENDING)); +} +static inline void clear_restore_sigmask(void) +{ + clear_thread_flag(TIF_RESTORE_SIGMASK); +} +static inline bool test_restore_sigmask(void) +{ + return test_thread_flag(TIF_RESTORE_SIGMASK); +} +static inline bool test_and_clear_restore_sigmask(void) +{ + return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK); +} + +#else /* TIF_RESTORE_SIGMASK */ + +/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */ +static inline void set_restore_sigmask(void) +{ + current->restore_sigmask = true; + WARN_ON(!test_thread_flag(TIF_SIGPENDING)); +} +static inline void clear_restore_sigmask(void) +{ + current->restore_sigmask = false; +} +static inline bool test_restore_sigmask(void) +{ + return current->restore_sigmask; +} +static inline bool test_and_clear_restore_sigmask(void) +{ + if (!current->restore_sigmask) + return false; + current->restore_sigmask = false; + return true; +} +#endif + +static inline void restore_saved_sigmask(void) +{ + if (test_and_clear_restore_sigmask()) + __set_current_blocked(¤t->saved_sigmask); +} + +static inline sigset_t *sigmask_to_save(void) +{ + sigset_t *res = ¤t->blocked; + if (unlikely(test_restore_sigmask())) + res = ¤t->saved_sigmask; + return res; +} + +static inline int kill_cad_pid(int sig, int priv) +{ + return kill_pid(cad_pid, sig, priv); +} + +/* These can be the second arg to send_sig_info/send_group_sig_info. */ +#define SEND_SIG_NOINFO ((struct siginfo *) 0) +#define SEND_SIG_PRIV ((struct siginfo *) 1) +#define SEND_SIG_FORCED ((struct siginfo *) 2) + +/* + * True if we are on the alternate signal stack. + */ +static inline int on_sig_stack(unsigned long sp) +{ + /* + * If the signal stack is SS_AUTODISARM then, by construction, we + * can't be on the signal stack unless user code deliberately set + * SS_AUTODISARM when we were already on it. + * + * This improves reliability: if user state gets corrupted such that + * the stack pointer points very close to the end of the signal stack, + * then this check will enable the signal to be handled anyway. + */ + if (current->sas_ss_flags & SS_AUTODISARM) + return 0; + +#ifdef CONFIG_STACK_GROWSUP + return sp >= current->sas_ss_sp && + sp - current->sas_ss_sp < current->sas_ss_size; +#else + return sp > current->sas_ss_sp && + sp - current->sas_ss_sp <= current->sas_ss_size; +#endif +} + +static inline int sas_ss_flags(unsigned long sp) +{ + if (!current->sas_ss_size) + return SS_DISABLE; + + return on_sig_stack(sp) ? SS_ONSTACK : 0; +} + +static inline void sas_ss_reset(struct task_struct *p) +{ + p->sas_ss_sp = 0; + p->sas_ss_size = 0; + p->sas_ss_flags = SS_DISABLE; +} + +static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) +{ + if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp)) +#ifdef CONFIG_STACK_GROWSUP + return current->sas_ss_sp; +#else + return current->sas_ss_sp + current->sas_ss_size; +#endif + return sp; +} + +extern void __cleanup_sighand(struct sighand_struct *); +extern void flush_itimer_signals(void); + +#define tasklist_empty() \ + list_empty(&init_task.tasks) + +#define next_task(p) \ + list_entry_rcu((p)->tasks.next, struct task_struct, tasks) + +#define for_each_process(p) \ + for (p = &init_task ; (p = next_task(p)) != &init_task ; ) + +extern bool current_is_single_threaded(void); + +/* + * Careful: do_each_thread/while_each_thread is a double loop so + * 'break' will not work as expected - use goto instead. + */ +#define do_each_thread(g, t) \ + for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do + +#define while_each_thread(g, t) \ + while ((t = next_thread(t)) != g) + +#define __for_each_thread(signal, t) \ + list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) + +#define for_each_thread(p, t) \ + __for_each_thread((p)->signal, t) + +/* Careful: this is a double loop, 'break' won't work as expected. */ +#define for_each_process_thread(p, t) \ + for_each_process(p) for_each_thread(p, t) + +typedef int (*proc_visitor)(struct task_struct *p, void *data); +void walk_process_tree(struct task_struct *top, proc_visitor, void *); + +static inline int get_nr_threads(struct task_struct *tsk) +{ + return tsk->signal->nr_threads; +} + +static inline bool thread_group_leader(struct task_struct *p) +{ + return p->exit_signal >= 0; +} + +/* Do to the insanities of de_thread it is possible for a process + * to have the pid of the thread group leader without actually being + * the thread group leader. For iteration through the pids in proc + * all we care about is that we have a task with the appropriate + * pid, we don't actually care if we have the right task. + */ +static inline bool has_group_leader_pid(struct task_struct *p) +{ + return task_pid(p) == p->signal->leader_pid; +} + +static inline +bool same_thread_group(struct task_struct *p1, struct task_struct *p2) +{ + return p1->signal == p2->signal; +} + +static inline struct task_struct *next_thread(const struct task_struct *p) +{ + return list_entry_rcu(p->thread_group.next, + struct task_struct, thread_group); +} + +static inline int thread_group_empty(struct task_struct *p) +{ + return list_empty(&p->thread_group); +} + +#define delay_group_leader(p) \ + (thread_group_leader(p) && !thread_group_empty(p)) + +extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, + unsigned long *flags); + +static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, + unsigned long *flags) +{ + struct sighand_struct *ret; + + ret = __lock_task_sighand(tsk, flags); + (void)__cond_lock(&tsk->sighand->siglock, ret); + return ret; +} + +static inline void unlock_task_sighand(struct task_struct *tsk, + unsigned long *flags) +{ + spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); +} + +static inline unsigned long task_rlimit(const struct task_struct *tsk, + unsigned int limit) +{ + return READ_ONCE(tsk->signal->rlim[limit].rlim_cur); +} + +static inline unsigned long task_rlimit_max(const struct task_struct *tsk, + unsigned int limit) +{ + return READ_ONCE(tsk->signal->rlim[limit].rlim_max); +} + +static inline unsigned long rlimit(unsigned int limit) +{ + return task_rlimit(current, limit); +} + +static inline unsigned long rlimit_max(unsigned int limit) +{ + return task_rlimit_max(current, limit); +} + #endif /* _LINUX_SCHED_SIGNAL_H */ -- cgit v1.2.3 From bcbb6a5bf7df6e37ba652d1f426eab042ec4f56b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 10:22:42 +0100 Subject: sched/headers: Move 'struct user_struct' definition and APIs to the new header 'struct user_struct' was added to sched.h historically, but it's actually entirely independent of task_struct and of scheduler details, so move it to its own header. Fix up .c files using those facilities. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 53 --------------------------------------------- include/linux/sched/user.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c1586104d4c0..71efbcafaa31 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -349,7 +349,6 @@ extern void io_schedule(void); void __noreturn do_task_dead(void); struct nsproxy; -struct user_namespace; #ifdef CONFIG_MMU extern void arch_pick_mmap_layout(struct mm_struct *mm); @@ -481,49 +480,6 @@ struct thread_group_cputimer { #include struct autogroup; -/* - * Some day this will be a full-fledged user tracking system.. - */ -struct user_struct { - atomic_t __count; /* reference count */ - atomic_t processes; /* How many processes does this user have? */ - atomic_t sigpending; /* How many pending signals does this user have? */ -#ifdef CONFIG_FANOTIFY - atomic_t fanotify_listeners; -#endif -#ifdef CONFIG_EPOLL - atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ -#endif -#ifdef CONFIG_POSIX_MQUEUE - /* protected by mq_lock */ - unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ -#endif - unsigned long locked_shm; /* How many pages of mlocked shm ? */ - unsigned long unix_inflight; /* How many files in flight in unix sockets */ - atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */ - -#ifdef CONFIG_KEYS - struct key *uid_keyring; /* UID specific keyring */ - struct key *session_keyring; /* UID's default session keyring */ -#endif - - /* Hash table maintenance information */ - struct hlist_node uidhash_node; - kuid_t uid; - -#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) - atomic_long_t locked_vm; -#endif -}; - -extern int uids_sysfs_init(void); - -extern struct user_struct *find_user(kuid_t); - -extern struct user_struct root_user; -#define INIT_USER (&root_user) - - struct backing_dev_info; struct reclaim_state; @@ -1908,15 +1864,6 @@ extern struct task_struct *find_task_by_vpid(pid_t nr); extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); -/* per-UID process charging. */ -extern struct user_struct * alloc_uid(kuid_t); -static inline struct user_struct *get_uid(struct user_struct *u) -{ - atomic_inc(&u->__count); - return u; -} -extern void free_uid(struct user_struct *); - #include extern void xtime_update(unsigned long ticks); diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 83238e5ddadd..40c6363da5ef 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -3,4 +3,58 @@ #include +struct key; + +/* + * Some day this will be a full-fledged user tracking system.. + */ +struct user_struct { + atomic_t __count; /* reference count */ + atomic_t processes; /* How many processes does this user have? */ + atomic_t sigpending; /* How many pending signals does this user have? */ +#ifdef CONFIG_FANOTIFY + atomic_t fanotify_listeners; +#endif +#ifdef CONFIG_EPOLL + atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ +#endif +#ifdef CONFIG_POSIX_MQUEUE + /* protected by mq_lock */ + unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ +#endif + unsigned long locked_shm; /* How many pages of mlocked shm ? */ + unsigned long unix_inflight; /* How many files in flight in unix sockets */ + atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */ + +#ifdef CONFIG_KEYS + struct key *uid_keyring; /* UID specific keyring */ + struct key *session_keyring; /* UID's default session keyring */ +#endif + + /* Hash table maintenance information */ + struct hlist_node uidhash_node; + kuid_t uid; + +#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) + atomic_long_t locked_vm; +#endif +}; + +extern int uids_sysfs_init(void); + +extern struct user_struct *find_user(kuid_t); + +extern struct user_struct root_user; +#define INIT_USER (&root_user) + + +/* per-UID process charging. */ +extern struct user_struct * alloc_uid(kuid_t); +static inline struct user_struct *get_uid(struct user_struct *u) +{ + atomic_inc(&u->__count); + return u; +} +extern void free_uid(struct user_struct *); + #endif /* _LINUX_SCHED_USER_H */ -- cgit v1.2.3 From d151b27d3f86589308cd8c891fdfd2db5f8e80d6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 11:17:23 +0100 Subject: sched/headers: Move softlockup detector watchdog methods to These methods don't belong into , they are neither directly related to task_struct or are scheduler functionality. Put them next to the other watchdog methods in . ( Arguably that header's name is a misnomer, and this patch makes it more so - but it should be renamed in another patch. ) Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 37 +++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 37 ------------------------------------- 2 files changed, 37 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 0a3fadc32693..aa3cd0878270 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -7,6 +7,43 @@ #include #include +#ifdef CONFIG_LOCKUP_DETECTOR +extern void touch_softlockup_watchdog_sched(void); +extern void touch_softlockup_watchdog(void); +extern void touch_softlockup_watchdog_sync(void); +extern void touch_all_softlockup_watchdogs(void); +extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); +extern unsigned int softlockup_panic; +extern unsigned int hardlockup_panic; +void lockup_detector_init(void); +#else +static inline void touch_softlockup_watchdog_sched(void) +{ +} +static inline void touch_softlockup_watchdog(void) +{ +} +static inline void touch_softlockup_watchdog_sync(void) +{ +} +static inline void touch_all_softlockup_watchdogs(void) +{ +} +static inline void lockup_detector_init(void) +{ +} +#endif + +#ifdef CONFIG_DETECT_HUNG_TASK +void reset_hung_task_detector(void); +#else +static inline void reset_hung_task_detector(void) +{ +} +#endif + /* * The run state of the lockup detectors is controlled by the content of the * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - diff --git a/include/linux/sched.h b/include/linux/sched.h index 71efbcafaa31..8a1d296c53a0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -286,43 +286,6 @@ extern int sched_cpu_dying(unsigned int cpu); extern void sched_show_task(struct task_struct *p); -#ifdef CONFIG_LOCKUP_DETECTOR -extern void touch_softlockup_watchdog_sched(void); -extern void touch_softlockup_watchdog(void); -extern void touch_softlockup_watchdog_sync(void); -extern void touch_all_softlockup_watchdogs(void); -extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); -extern unsigned int softlockup_panic; -extern unsigned int hardlockup_panic; -void lockup_detector_init(void); -#else -static inline void touch_softlockup_watchdog_sched(void) -{ -} -static inline void touch_softlockup_watchdog(void) -{ -} -static inline void touch_softlockup_watchdog_sync(void) -{ -} -static inline void touch_all_softlockup_watchdogs(void) -{ -} -static inline void lockup_detector_init(void) -{ -} -#endif - -#ifdef CONFIG_DETECT_HUNG_TASK -void reset_hung_task_detector(void); -#else -static inline void reset_hung_task_detector(void) -{ -} -#endif - /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) -- cgit v1.2.3 From 8d88460edc05224b8d7ae3372173153876a02825 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 12:06:10 +0100 Subject: sched/headers: Move 'struct pacct_struct' and 'struct cpu_itimer' form to These structures are actually part of 'struct signal', so move them to where they belong. This further decreases the size and complexity of . Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 13 ------------- include/linux/sched/signal.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8a1d296c53a0..8bf111efe98e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -326,19 +326,6 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} #endif -struct pacct_struct { - int ac_flag; - long ac_exitcode; - unsigned long ac_mem; - u64 ac_utime, ac_stime; - unsigned long ac_minflt, ac_majflt; -}; - -struct cpu_itimer { - u64 expires; - u64 incr; -}; - /** * struct prev_cputime - snaphsot of system and user cputime * @utime: time spent in user mode diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 53fe5450f431..30e7ceed3ef6 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -19,6 +19,22 @@ struct sighand_struct { wait_queue_head_t signalfd_wqh; }; +/* + * Per-process accounting stats: + */ +struct pacct_struct { + int ac_flag; + long ac_exitcode; + unsigned long ac_mem; + u64 ac_utime, ac_stime; + unsigned long ac_minflt, ac_majflt; +}; + +struct cpu_itimer { + u64 expires; + u64 incr; +}; + /* * NOTE! "signal_struct" does not have its own * locking, because a shared signal_struct always -- cgit v1.2.3 From 7284c6d419b5a6ae9806927f1fd4f0cfd19a16f5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 12:14:19 +0100 Subject: sched/headers: Move the cpufreq interfaces to No need to have this in the generic header. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 17 ----------------- include/linux/sched/cpufreq.h | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8bf111efe98e..aae79706ec4f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2288,21 +2288,4 @@ static inline void mm_update_next_owner(struct mm_struct *mm) } #endif /* CONFIG_MEMCG */ -#define SCHED_CPUFREQ_RT (1U << 0) -#define SCHED_CPUFREQ_DL (1U << 1) -#define SCHED_CPUFREQ_IOWAIT (1U << 2) - -#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) - -#ifdef CONFIG_CPU_FREQ -struct update_util_data { - void (*func)(struct update_util_data *data, u64 time, unsigned int flags); -}; - -void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, - void (*func)(struct update_util_data *data, u64 time, - unsigned int flags)); -void cpufreq_remove_update_util_hook(int cpu); -#endif /* CONFIG_CPU_FREQ */ - #endif diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index 07ed9664fa20..2bdcfbfc4c30 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h @@ -3,4 +3,25 @@ #include +/* + * Interface between cpufreq drivers and the scheduler: + */ + +#define SCHED_CPUFREQ_RT (1U << 0) +#define SCHED_CPUFREQ_DL (1U << 1) +#define SCHED_CPUFREQ_IOWAIT (1U << 2) + +#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) + +#ifdef CONFIG_CPU_FREQ +struct update_util_data { + void (*func)(struct update_util_data *data, u64 time, unsigned int flags); +}; + +void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, + void (*func)(struct update_util_data *data, u64 time, + unsigned int flags)); +void cpufreq_remove_update_util_hook(int cpu); +#endif /* CONFIG_CPU_FREQ */ + #endif /* _LINUX_SCHED_CPUFREQ_H */ -- cgit v1.2.3 From 4240c8bf877f1145571106a2934c5cea0b51b178 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 12:18:24 +0100 Subject: sched/headers: Move more mm_struct related functionality from to Neither the mmap_layout nor the mm_update_next_owner() methods need to be in - move them to the more appropriate header. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 21 --------------------- include/linux/sched/mm.h | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index aae79706ec4f..1ddb82be6b50 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -313,19 +313,6 @@ void __noreturn do_task_dead(void); struct nsproxy; -#ifdef CONFIG_MMU -extern void arch_pick_mmap_layout(struct mm_struct *mm); -extern unsigned long -arch_get_unmapped_area(struct file *, unsigned long, unsigned long, - unsigned long, unsigned long); -extern unsigned long -arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); -#else -static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} -#endif - /** * struct prev_cputime - snaphsot of system and user cputime * @utime: time spent in user mode @@ -2280,12 +2267,4 @@ static inline void inc_syscw(struct task_struct *tsk) #define TASK_SIZE_OF(tsk) TASK_SIZE #endif -#ifdef CONFIG_MEMCG -extern void mm_update_next_owner(struct mm_struct *mm); -#else -static inline void mm_update_next_owner(struct mm_struct *mm) -{ -} -#endif /* CONFIG_MEMCG */ - #endif diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index be1ae55f5ab9..93fad9f0f466 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -100,4 +100,25 @@ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); /* Remove the current tasks stale references to the old mm_struct */ extern void mm_release(struct task_struct *, struct mm_struct *); +#ifdef CONFIG_MEMCG +extern void mm_update_next_owner(struct mm_struct *mm); +#else +static inline void mm_update_next_owner(struct mm_struct *mm) +{ +} +#endif /* CONFIG_MEMCG */ + +#ifdef CONFIG_MMU +extern void arch_pick_mmap_layout(struct mm_struct *mm); +extern unsigned long +arch_get_unmapped_area(struct file *, unsigned long, unsigned long, + unsigned long, unsigned long); +extern unsigned long +arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); +#else +static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} +#endif + #endif /* _LINUX_SCHED_MM_H */ -- cgit v1.2.3 From abe722a1c59728c6c0ea4e4d5efcfe397c8abebc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 12:27:56 +0100 Subject: sched/headers: Move the 'init_mm' declaration from to Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/mm_types.h | 2 ++ include/linux/sched.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6daaf0a51968..28bc710d3467 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -546,6 +546,8 @@ struct mm_struct { struct work_struct async_put_work; }; +extern struct mm_struct init_mm; + static inline void mm_init_cpumask(struct mm_struct *mm) { #ifdef CONFIG_CPUMASK_OFFSTACK diff --git a/include/linux/sched.h b/include/linux/sched.h index 1ddb82be6b50..253d8ab6256c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1782,8 +1782,6 @@ static inline int kstack_end(void *addr) extern union thread_union init_thread_union; extern struct task_struct init_task; -extern struct mm_struct init_mm; - extern struct pid_namespace init_pid_ns; /* -- cgit v1.2.3 From d026ce796cbca3c49678a68bb4a39fb4b9cf8192 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 12:32:21 +0100 Subject: sched/headers: Move in_vfork() from to The in_vfork() function deals with task->mm, so it better belongs into . Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 26 -------------------------- include/linux/sched/mm.h | 26 ++++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 253d8ab6256c..aa60812b4b7a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1242,32 +1242,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) #define TNF_FAULT_LOCAL 0x08 #define TNF_MIGRATE_FAIL 0x10 -static inline bool in_vfork(struct task_struct *tsk) -{ - bool ret; - - /* - * need RCU to access ->real_parent if CLONE_VM was used along with - * CLONE_PARENT. - * - * We check real_parent->mm == tsk->mm because CLONE_VFORK does not - * imply CLONE_VM - * - * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus - * ->real_parent is not necessarily the task doing vfork(), so in - * theory we can't rely on task_lock() if we want to dereference it. - * - * And in this case we can't trust the real_parent->mm == tsk->mm - * check, it can be false negative. But we do not care, if init or - * another oom-unkillable task does this it should blame itself. - */ - rcu_read_lock(); - ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm; - rcu_read_unlock(); - - return ret; -} - #ifdef CONFIG_NUMA_BALANCING extern void task_numa_fault(int last_node, int node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 93fad9f0f466..64d83fa8d93a 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -121,4 +121,30 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} #endif +static inline bool in_vfork(struct task_struct *tsk) +{ + bool ret; + + /* + * need RCU to access ->real_parent if CLONE_VM was used along with + * CLONE_PARENT. + * + * We check real_parent->mm == tsk->mm because CLONE_VFORK does not + * imply CLONE_VM + * + * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus + * ->real_parent is not necessarily the task doing vfork(), so in + * theory we can't rely on task_lock() if we want to dereference it. + * + * And in this case we can't trust the real_parent->mm == tsk->mm + * check, it can be false negative. But we do not care, if init or + * another oom-unkillable task does this it should blame itself. + */ + rcu_read_lock(); + ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm; + rcu_read_unlock(); + + return ret; +} + #endif /* _LINUX_SCHED_MM_H */ -- cgit v1.2.3 From 5647028d550c959577527cec9c0f29fbcea029ea Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 12:39:17 +0100 Subject: sched/headers: Move the NUMA balancing interfaces from to Split out the interface between the scheduler and the MM which deals with page fault driven NUMA balancing, into the new header. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 35 ------------------------------- include/linux/sched/numa_balancing.h | 40 ++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index aa60812b4b7a..fcaea1e7b08a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1236,41 +1236,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) } #endif -#define TNF_MIGRATED 0x01 -#define TNF_NO_GROUP 0x02 -#define TNF_SHARED 0x04 -#define TNF_FAULT_LOCAL 0x08 -#define TNF_MIGRATE_FAIL 0x10 - -#ifdef CONFIG_NUMA_BALANCING -extern void task_numa_fault(int last_node, int node, int pages, int flags); -extern pid_t task_numa_group_id(struct task_struct *p); -extern void set_numabalancing_state(bool enabled); -extern void task_numa_free(struct task_struct *p); -extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, - int src_nid, int dst_cpu); -#else -static inline void task_numa_fault(int last_node, int node, int pages, - int flags) -{ -} -static inline pid_t task_numa_group_id(struct task_struct *p) -{ - return 0; -} -static inline void set_numabalancing_state(bool enabled) -{ -} -static inline void task_numa_free(struct task_struct *p) -{ -} -static inline bool should_numa_migrate_memory(struct task_struct *p, - struct page *page, int src_nid, int dst_cpu) -{ - return true; -} -#endif - static inline struct pid *task_pid(struct task_struct *task) { return task->pids[PIDTYPE_PID].pid; diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index 999182279f78..35d5fc77b4be 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -1,6 +1,46 @@ #ifndef _LINUX_SCHED_NUMA_BALANCING_H #define _LINUX_SCHED_NUMA_BALANCING_H +/* + * This is the interface between the scheduler and the MM that + * implements memory access pattern based NUMA-balancing: + */ + #include +#define TNF_MIGRATED 0x01 +#define TNF_NO_GROUP 0x02 +#define TNF_SHARED 0x04 +#define TNF_FAULT_LOCAL 0x08 +#define TNF_MIGRATE_FAIL 0x10 + +#ifdef CONFIG_NUMA_BALANCING +extern void task_numa_fault(int last_node, int node, int pages, int flags); +extern pid_t task_numa_group_id(struct task_struct *p); +extern void set_numabalancing_state(bool enabled); +extern void task_numa_free(struct task_struct *p); +extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, + int src_nid, int dst_cpu); +#else +static inline void task_numa_fault(int last_node, int node, int pages, + int flags) +{ +} +static inline pid_t task_numa_group_id(struct task_struct *p) +{ + return 0; +} +static inline void set_numabalancing_state(bool enabled) +{ +} +static inline void task_numa_free(struct task_struct *p) +{ +} +static inline bool should_numa_migrate_memory(struct task_struct *p, + struct page *page, int src_nid, int dst_cpu) +{ + return true; +} +#endif + #endif /* _LINUX_SCHED_NUMA_BALANCING_H */ -- cgit v1.2.3 From c41cfc6c5ba46050b416c0b0b2621cbe68c4669c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 12:45:58 +0100 Subject: sched/headers: Move the JOBCTL_ defines and methods from to Only a small fraction of sched.h users actually utilizes these defines, and they are not scheduler functionality in any case, so move them into their separate header. (Also make a self-contained header.) Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 30 ------------------------------ include/linux/sched/jobctl.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index fcaea1e7b08a..46ba8f1b5f1f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1549,36 +1549,6 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) TASK_PFA_TEST(LMK_WAITING, lmk_waiting) TASK_PFA_SET(LMK_WAITING, lmk_waiting) -/* - * task->jobctl flags - */ -#define JOBCTL_STOP_SIGMASK 0xffff /* signr of the last group stop */ - -#define JOBCTL_STOP_DEQUEUED_BIT 16 /* stop signal dequeued */ -#define JOBCTL_STOP_PENDING_BIT 17 /* task should stop for group stop */ -#define JOBCTL_STOP_CONSUME_BIT 18 /* consume group stop count */ -#define JOBCTL_TRAP_STOP_BIT 19 /* trap for STOP */ -#define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ -#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ -#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ - -#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) -#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) -#define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT) -#define JOBCTL_TRAP_STOP (1UL << JOBCTL_TRAP_STOP_BIT) -#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT) -#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) -#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) - -#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) -#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) - -extern bool task_set_jobctl_pending(struct task_struct *task, - unsigned long mask); -extern void task_clear_jobctl_trapping(struct task_struct *task); -extern void task_clear_jobctl_pending(struct task_struct *task, - unsigned long mask); - static inline void rcu_copy_process(struct task_struct *p) { #ifdef CONFIG_PREEMPT_RCU diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h index 228e4644937b..016afa0fb3bb 100644 --- a/include/linux/sched/jobctl.h +++ b/include/linux/sched/jobctl.h @@ -1,4 +1,36 @@ #ifndef _LINUX_SCHED_JOBCTL_H #define _LINUX_SCHED_JOBCTL_H +#include + +struct task_struct; + +/* + * task->jobctl flags + */ +#define JOBCTL_STOP_SIGMASK 0xffff /* signr of the last group stop */ + +#define JOBCTL_STOP_DEQUEUED_BIT 16 /* stop signal dequeued */ +#define JOBCTL_STOP_PENDING_BIT 17 /* task should stop for group stop */ +#define JOBCTL_STOP_CONSUME_BIT 18 /* consume group stop count */ +#define JOBCTL_TRAP_STOP_BIT 19 /* trap for STOP */ +#define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */ +#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ +#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ + +#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) +#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) +#define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT) +#define JOBCTL_TRAP_STOP (1UL << JOBCTL_TRAP_STOP_BIT) +#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT) +#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) +#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) + +#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) +#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) + +extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask); +extern void task_clear_jobctl_trapping(struct task_struct *task); +extern void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask); + #endif /* _LINUX_SCHED_JOBCTL_H */ -- cgit v1.2.3 From 30a1baab81189f01c427c4939610b2dde2dbec37 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 10:06:45 +0100 Subject: sched/headers: Remove various unrelated headers from Remove the following header inclusions from : #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include because they are either not required, or are already included naturally as part of the remaining headers. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 46ba8f1b5f1f..7752025679c0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -5,60 +5,32 @@ #include -#include /* for HZ */ - #include -#include -#include -#include -#include -#include #include #include -#include -#include -#include -#include -#include #include -#include - -#include #include -#include #include #include #include -#include -#include #include #include -#include -#include #include -#include #include #include -#include -#include #include -#include #include #include #include #include #include -#include -#include #include #include #include #include -#include - struct sched_attr; struct sched_param; -- cgit v1.2.3 From 9a07000400c853c57477c2a5138ae9f556c40897 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 19:10:05 +0100 Subject: sched/headers: Move CONFIG_TASK_XACCT bits from to The CONFIG_TASK_XACCT=y accounting inline functions are only used by fs/read_write.c, so move them into their separate header. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 38 -------------------------------------- include/linux/sched/xacct.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7752025679c0..1201312e111e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2104,44 +2104,6 @@ extern struct task_group root_task_group; extern int task_can_switch_user(struct user_struct *up, struct task_struct *tsk); -#ifdef CONFIG_TASK_XACCT -static inline void add_rchar(struct task_struct *tsk, ssize_t amt) -{ - tsk->ioac.rchar += amt; -} - -static inline void add_wchar(struct task_struct *tsk, ssize_t amt) -{ - tsk->ioac.wchar += amt; -} - -static inline void inc_syscr(struct task_struct *tsk) -{ - tsk->ioac.syscr++; -} - -static inline void inc_syscw(struct task_struct *tsk) -{ - tsk->ioac.syscw++; -} -#else -static inline void add_rchar(struct task_struct *tsk, ssize_t amt) -{ -} - -static inline void add_wchar(struct task_struct *tsk, ssize_t amt) -{ -} - -static inline void inc_syscr(struct task_struct *tsk) -{ -} - -static inline void inc_syscw(struct task_struct *tsk) -{ -} -#endif - #ifndef TASK_SIZE_OF #define TASK_SIZE_OF(tsk) TASK_SIZE #endif diff --git a/include/linux/sched/xacct.h b/include/linux/sched/xacct.h index 890f7ce5cd27..a28156a0d34a 100644 --- a/include/linux/sched/xacct.h +++ b/include/linux/sched/xacct.h @@ -1,6 +1,48 @@ #ifndef _LINUX_SCHED_XACCT_H #define _LINUX_SCHED_XACCT_H +/* + * Extended task accounting methods: + */ + #include +#ifdef CONFIG_TASK_XACCT +static inline void add_rchar(struct task_struct *tsk, ssize_t amt) +{ + tsk->ioac.rchar += amt; +} + +static inline void add_wchar(struct task_struct *tsk, ssize_t amt) +{ + tsk->ioac.wchar += amt; +} + +static inline void inc_syscr(struct task_struct *tsk) +{ + tsk->ioac.syscr++; +} + +static inline void inc_syscw(struct task_struct *tsk) +{ + tsk->ioac.syscw++; +} +#else +static inline void add_rchar(struct task_struct *tsk, ssize_t amt) +{ +} + +static inline void add_wchar(struct task_struct *tsk, ssize_t amt) +{ +} + +static inline void inc_syscr(struct task_struct *tsk) +{ +} + +static inline void inc_syscw(struct task_struct *tsk) +{ +} +#endif + #endif /* _LINUX_SCHED_XACCT_H */ -- cgit v1.2.3 From 2a1f062a4acf0be50516ceece92a7182a173d55a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 19:15:33 +0100 Subject: sched/headers: Move signal wakeup & sigpending methods from into This reduces the size of . Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 51 ------------------------------------------ include/linux/sched/signal.h | 53 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1201312e111e..1e0400069e95 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1931,37 +1931,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } -static inline int restart_syscall(void) -{ - set_tsk_thread_flag(current, TIF_SIGPENDING); - return -ERESTARTNOINTR; -} - -static inline int signal_pending(struct task_struct *p) -{ - return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); -} - -static inline int __fatal_signal_pending(struct task_struct *p) -{ - return unlikely(sigismember(&p->pending.signal, SIGKILL)); -} - -static inline int fatal_signal_pending(struct task_struct *p) -{ - return signal_pending(p) && __fatal_signal_pending(p); -} - -static inline int signal_pending_state(long state, struct task_struct *p) -{ - if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) - return 0; - if (!signal_pending(p)) - return 0; - - return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); -} - /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return @@ -2028,26 +1997,6 @@ static __always_inline bool need_resched(void) void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); -/* - * Reevaluate whether the task has signals pending delivery. - * Wake the task if so. - * This is required every time the blocked sigset_t changes. - * callers must hold sighand->siglock. - */ -extern void recalc_sigpending_and_wake(struct task_struct *t); -extern void recalc_sigpending(void); - -extern void signal_wake_up_state(struct task_struct *t, unsigned int state); - -static inline void signal_wake_up(struct task_struct *t, bool resume) -{ - signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); -} -static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) -{ - signal_wake_up_state(t, resume ? __TASK_TRACED : 0); -} - /* * Wrappers for p->thread_info->cpu access. No-op on UP. */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 30e7ceed3ef6..b3545fb4333a 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -3,10 +3,10 @@ #include #include -#include #include #include #include +#include /* * Types defining task->signal and task->sighand and APIs using them: @@ -271,6 +271,57 @@ extern void sigqueue_free(struct sigqueue *); extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); +static inline int restart_syscall(void) +{ + set_tsk_thread_flag(current, TIF_SIGPENDING); + return -ERESTARTNOINTR; +} + +static inline int signal_pending(struct task_struct *p) +{ + return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); +} + +static inline int __fatal_signal_pending(struct task_struct *p) +{ + return unlikely(sigismember(&p->pending.signal, SIGKILL)); +} + +static inline int fatal_signal_pending(struct task_struct *p) +{ + return signal_pending(p) && __fatal_signal_pending(p); +} + +static inline int signal_pending_state(long state, struct task_struct *p) +{ + if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL))) + return 0; + if (!signal_pending(p)) + return 0; + + return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); +} + +/* + * Reevaluate whether the task has signals pending delivery. + * Wake the task if so. + * This is required every time the blocked sigset_t changes. + * callers must hold sighand->siglock. + */ +extern void recalc_sigpending_and_wake(struct task_struct *t); +extern void recalc_sigpending(void); + +extern void signal_wake_up_state(struct task_struct *t, unsigned int state); + +static inline void signal_wake_up(struct task_struct *t, bool resume) +{ + signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); +} +static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) +{ + signal_wake_up_state(t, resume ? __TASK_TRACED : 0); +} + #ifdef TIF_RESTORE_SIGMASK /* * Legacy restore_sigmask accessors. These are inefficient on -- cgit v1.2.3 From 74444edaa0b2ef946e846d5ddf1ba90efcd7c200 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 20:43:54 +0100 Subject: sched/headers: Move the memalloc_noio_*() APIs to In preparation to remove the include from . Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 22 ---------------------- include/linux/sched/mm.h | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e0400069e95..dbc3ff04750a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1468,28 +1468,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *s #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) -/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags - * __GFP_FS is also cleared as it implies __GFP_IO. - */ -static inline gfp_t memalloc_noio_flags(gfp_t flags) -{ - if (unlikely(current->flags & PF_MEMALLOC_NOIO)) - flags &= ~(__GFP_IO | __GFP_FS); - return flags; -} - -static inline unsigned int memalloc_noio_save(void) -{ - unsigned int flags = current->flags & PF_MEMALLOC_NOIO; - current->flags |= PF_MEMALLOC_NOIO; - return flags; -} - -static inline void memalloc_noio_restore(unsigned int flags) -{ - current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; -} - /* Per-process atomic flags. */ #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 64d83fa8d93a..62dca4e0b4e0 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -147,4 +147,26 @@ static inline bool in_vfork(struct task_struct *tsk) return ret; } +/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags + * __GFP_FS is also cleared as it implies __GFP_IO. + */ +static inline gfp_t memalloc_noio_flags(gfp_t flags) +{ + if (unlikely(current->flags & PF_MEMALLOC_NOIO)) + flags &= ~(__GFP_IO | __GFP_FS); + return flags; +} + +static inline unsigned int memalloc_noio_save(void) +{ + unsigned int flags = current->flags & PF_MEMALLOC_NOIO; + current->flags |= PF_MEMALLOC_NOIO; + return flags; +} + +static inline void memalloc_noio_restore(unsigned int flags) +{ + current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; +} + #endif /* _LINUX_SCHED_MM_H */ -- cgit v1.2.3 From 3605df49556ebb7641d1f8ec2e7eeecf4dd734bf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 11:03:12 +0100 Subject: sched/headers: Move task statistics APIs from to There are a number of task statistics related variables and methods exported via sched.h - collect them into and include it from their usage sites. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 10 ---------- include/linux/sched/stat.h | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index dbc3ff04750a..415baf253517 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -46,16 +46,6 @@ struct nameidata; struct signal_struct; struct sighand_struct; -extern unsigned long total_forks; -extern int nr_threads; -DECLARE_PER_CPU(unsigned long, process_counts); -extern int nr_processes(void); -extern unsigned long nr_running(void); -extern bool single_task_running(void); -extern unsigned long nr_iowait(void); -extern unsigned long nr_iowait_cpu(int cpu); -extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); - #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void cpu_load_update_nohz_start(void); extern void cpu_load_update_nohz_stop(void); diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h index 30fe012aecb0..0d52ceb6d3ae 100644 --- a/include/linux/sched/stat.h +++ b/include/linux/sched/stat.h @@ -3,4 +3,22 @@ #include +/* + * Various counters maintained by the scheduler and fork(), + * exposed via /proc, sys.c or used by drivers via these APIs. + * + * ( Note that all these values are aquired without locking, + * so they can only be relied on in narrow circumstances. ) + */ + +extern unsigned long total_forks; +extern int nr_threads; +DECLARE_PER_CPU(unsigned long, process_counts); +extern int nr_processes(void); +extern unsigned long nr_running(void); +extern bool single_task_running(void); +extern unsigned long nr_iowait(void); +extern unsigned long nr_iowait_cpu(int cpu); +extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); + #endif /* _LINUX_SCHED_STAT_H */ -- cgit v1.2.3 From 752b3ca734cbc17c0456b846ae886c0ed39c5703 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 11:12:59 +0100 Subject: sched/headers: Move the NOHZ APIs from to There's a number of NOHZ/dyntics related functionality in , but only a handful of timer files are making use of them. Move them into their own header. This better documents these APIs and unclutters . Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 35 ----------------------------------- include/linux/sched/nohz.h | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 415baf253517..5f9bfc603d19 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -46,14 +46,6 @@ struct nameidata; struct signal_struct; struct sighand_struct; -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void cpu_load_update_nohz_start(void); -extern void cpu_load_update_nohz_stop(void); -#else -static inline void cpu_load_update_nohz_start(void) { } -static inline void cpu_load_update_nohz_stop(void) { } -#endif - extern void dump_cpu_task(int cpu); struct seq_file; @@ -204,15 +196,6 @@ extern cpumask_var_t cpu_isolated_map; extern int runqueue_is_locked(int cpu); -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void nohz_balance_enter_idle(int cpu); -extern void set_cpu_sd_state_idle(void); -extern int get_nohz_timer_target(void); -#else -static inline void nohz_balance_enter_idle(int cpu) { } -static inline void set_cpu_sd_state_idle(void) { } -#endif - /* * Only dump TASK_* tasks. (0 for all tasks) */ @@ -1535,14 +1518,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, } #endif -#ifdef CONFIG_NO_HZ_COMMON -void calc_load_enter_idle(void); -void calc_load_exit_idle(void); -#else -static inline void calc_load_enter_idle(void) { } -static inline void calc_load_exit_idle(void) { } -#endif /* CONFIG_NO_HZ_COMMON */ - #ifndef cpu_relax_yield #define cpu_relax_yield() cpu_relax() #endif @@ -1563,16 +1538,6 @@ extern void idle_task_exit(void); static inline void idle_task_exit(void) {} #endif -#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) -extern void wake_up_nohz_cpu(int cpu); -#else -static inline void wake_up_nohz_cpu(int cpu) { } -#endif - -#ifdef CONFIG_NO_HZ_FULL -extern u64 scheduler_tick_max_deferment(void); -#endif - extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index fe6caf0dab10..9471f0736a3a 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -3,4 +3,43 @@ #include +/* + * This is the interface between the scheduler and nohz/dyntics: + */ + +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +extern void cpu_load_update_nohz_start(void); +extern void cpu_load_update_nohz_stop(void); +#else +static inline void cpu_load_update_nohz_start(void) { } +static inline void cpu_load_update_nohz_stop(void) { } +#endif + +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +extern void nohz_balance_enter_idle(int cpu); +extern void set_cpu_sd_state_idle(void); +extern int get_nohz_timer_target(void); +#else +static inline void nohz_balance_enter_idle(int cpu) { } +static inline void set_cpu_sd_state_idle(void) { } +#endif + +#ifdef CONFIG_NO_HZ_COMMON +void calc_load_enter_idle(void); +void calc_load_exit_idle(void); +#else +static inline void calc_load_enter_idle(void) { } +static inline void calc_load_exit_idle(void) { } +#endif /* CONFIG_NO_HZ_COMMON */ + +#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) +extern void wake_up_nohz_cpu(int cpu); +#else +static inline void wake_up_nohz_cpu(int cpu) { } +#endif + +#ifdef CONFIG_NO_HZ_FULL +extern u64 scheduler_tick_max_deferment(void); +#endif + #endif /* _LINUX_SCHED_NOHZ_H */ -- cgit v1.2.3 From d3d1e320d43a7bad9603acf0214406a1e8795c63 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 11:16:09 +0100 Subject: sched/headers: Move debugging functions from to Collect the various scheduler and task state debugging APIs scattered around into the new header. In particular the show_regs() and show_stack() prototype affects many files, update them. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 36 ----------------------------------- include/linux/sched/debug.h | 46 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5f9bfc603d19..76a2e522be29 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -46,15 +46,9 @@ struct nameidata; struct signal_struct; struct sighand_struct; -extern void dump_cpu_task(int cpu); - struct seq_file; struct cfs_rq; struct task_group; -#ifdef CONFIG_SCHED_DEBUG -extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); -extern void proc_sched_set_task(struct task_struct *p); -#endif /* * Task state bitmask. NOTE! These bits are also @@ -196,25 +190,6 @@ extern cpumask_var_t cpu_isolated_map; extern int runqueue_is_locked(int cpu); -/* - * Only dump TASK_* tasks. (0 for all tasks) - */ -extern void show_state_filter(unsigned long state_filter); - -static inline void show_state(void) -{ - show_state_filter(0); -} - -extern void show_regs(struct pt_regs *); - -/* - * TASK is a pointer to the task whose backtrace we want to see (or NULL for current - * task), SP is the stack pointer of the first frame that should be shown in the back - * trace (or NULL if the entire call-chain of the task should be shown). - */ -extern void show_stack(struct task_struct *task, unsigned long *sp); - extern void cpu_init (void); extern void trap_init(void); extern void update_process_times(int user); @@ -229,17 +204,6 @@ extern int sched_cpu_dying(unsigned int cpu); # define sched_cpu_dying NULL #endif -extern void sched_show_task(struct task_struct *p); - -/* Attach to any functions which should be ignored in wchan output. */ -#define __sched __attribute__((__section__(".sched.text"))) - -/* Linker adds these: start and end of __sched functions */ -extern char __sched_text_start[], __sched_text_end[]; - -/* Is this address in the __sched functions? */ -extern int in_sched_functions(unsigned long addr); - #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern signed long schedule_timeout(signed long timeout); extern signed long schedule_timeout_interruptible(signed long timeout); diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h index 55bc0cd35fd5..853bbef0b47b 100644 --- a/include/linux/sched/debug.h +++ b/include/linux/sched/debug.h @@ -3,4 +3,50 @@ #include +/* + * Various scheduler/task debugging interfaces: + */ + +struct task_struct; + +extern void dump_cpu_task(int cpu); + +/* + * Only dump TASK_* tasks. (0 for all tasks) + */ +extern void show_state_filter(unsigned long state_filter); + +static inline void show_state(void) +{ + show_state_filter(0); +} + +struct pt_regs; + +extern void show_regs(struct pt_regs *); + +/* + * TASK is a pointer to the task whose backtrace we want to see (or NULL for current + * task), SP is the stack pointer of the first frame that should be shown in the back + * trace (or NULL if the entire call-chain of the task should be shown). + */ +extern void show_stack(struct task_struct *task, unsigned long *sp); + +extern void sched_show_task(struct task_struct *p); + +#ifdef CONFIG_SCHED_DEBUG +struct seq_file; +extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); +extern void proc_sched_set_task(struct task_struct *p); +#endif + +/* Attach to any functions which should be ignored in wchan output. */ +#define __sched __attribute__((__section__(".sched.text"))) + +/* Linker adds these: start and end of __sched functions */ +extern char __sched_text_start[], __sched_text_end[]; + +/* Is this address in the __sched functions? */ +extern int in_sched_functions(unsigned long addr); + #endif /* _LINUX_SCHED_DEBUG_H */ -- cgit v1.2.3 From 63cc9d6fca8c220c2406f1376100a9acb55197af Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 12:04:39 +0100 Subject: sched/headers, time/timekeeping: Move the xtime_update() prototype from to This was in only for hysterical raisins. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- include/linux/time.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 76a2e522be29..5a4fbc76feb4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1579,8 +1579,6 @@ extern struct task_struct *find_task_by_pid_ns(pid_t nr, #include -extern void xtime_update(unsigned long ticks); - extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk); diff --git a/include/linux/time.h b/include/linux/time.h index 23f0f5ce3090..4f4ab65b6e61 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -167,6 +167,8 @@ static inline bool timespec_inject_offset_valid(const struct timespec *ts) extern u32 (*arch_gettimeoffset)(void); #endif +extern void xtime_update(unsigned long ticks); + struct itimerval; extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); -- cgit v1.2.3 From 70b8157e61d0143fb44ae9482557d7aca365da3d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 12:11:00 +0100 Subject: sched/headers: Move include from the middle of to the header portion Linux-0.01 already defined 'current' in the middle of sched.h, so this is an ancient historical precedent - but still in a modern kernel it looks a bit weird that we have: #include in the middle of the header. Move it further up. If this was done for some obscure dependency reasons then we'll trigger and document it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5a4fbc76feb4..ac0fed4c3130 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -31,6 +31,8 @@ #include #include +#include + struct sched_attr; struct sched_param; @@ -1577,8 +1579,6 @@ extern struct task_struct *find_task_by_vpid(pid_t nr); extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); -#include - extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk); -- cgit v1.2.3 From 0ca0156973a47e689f3bc817e26e15fff3f84eec Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 14:52:01 +0100 Subject: sched/headers: Split hotplug CPU interfaces out of into Split the CPU hotplug scheduler APIs out of the common header. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 15 --------------- include/linux/sched/hotplug.h | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ac0fed4c3130..25cc0adb3e08 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -196,15 +196,6 @@ extern void cpu_init (void); extern void trap_init(void); extern void update_process_times(int user); extern void scheduler_tick(void); -extern int sched_cpu_starting(unsigned int cpu); -extern int sched_cpu_activate(unsigned int cpu); -extern int sched_cpu_deactivate(unsigned int cpu); - -#ifdef CONFIG_HOTPLUG_CPU -extern int sched_cpu_dying(unsigned int cpu); -#else -# define sched_cpu_dying NULL -#endif #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern signed long schedule_timeout(signed long timeout); @@ -1498,12 +1489,6 @@ extern void sched_exec(void); #define sched_exec() {} #endif -#ifdef CONFIG_HOTPLUG_CPU -extern void idle_task_exit(void); -#else -static inline void idle_task_exit(void) {} -#endif - extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h index 34670ed24894..c608d3c1ddb8 100644 --- a/include/linux/sched/hotplug.h +++ b/include/linux/sched/hotplug.h @@ -3,4 +3,24 @@ #include +/* + * Scheduler interfaces for hotplug CPU support: + */ + +extern int sched_cpu_starting(unsigned int cpu); +extern int sched_cpu_activate(unsigned int cpu); +extern int sched_cpu_deactivate(unsigned int cpu); + +#ifdef CONFIG_HOTPLUG_CPU +extern int sched_cpu_dying(unsigned int cpu); +#else +# define sched_cpu_dying NULL +#endif + +#ifdef CONFIG_HOTPLUG_CPU +extern void idle_task_exit(void); +#else +static inline void idle_task_exit(void) {} +#endif + #endif /* _LINUX_SCHED_HOTPLUG_H */ -- cgit v1.2.3 From 901b14bd946a8b7ea211105b6207e082ddd36846 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 15:24:12 +0100 Subject: sched/headers: Move task lifetime APIs from to There's a fair amount of task lifetime management (a.k.a fork()/exit()) related APIs in , but only a small fraction of the users of the generic sched.h header make use of them. Move these functions to the header. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 91 ------------------------------------------- include/linux/sched/task.h | 97 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 91 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 25cc0adb3e08..b1677c8db03f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -165,28 +165,10 @@ struct task_group; /* Task command name length */ #define TASK_COMM_LEN 16 -#include - -/* - * This serializes "schedule()" and also protects - * the run-queue from deletions/modifications (but - * _adding_ to the beginning of the run-queue has - * a separate lock). - */ -extern rwlock_t tasklist_lock; -extern spinlock_t mmlist_lock; - struct task_struct; -#ifdef CONFIG_PROVE_RCU -extern int lockdep_tasklist_lock_is_held(void); -#endif /* #ifdef CONFIG_PROVE_RCU */ - extern void sched_init(void); extern void sched_init_smp(void); -extern asmlinkage void schedule_tail(struct task_struct *prev); -extern void init_idle(struct task_struct *idle, int cpu); -extern void init_idle_bootup_task(struct task_struct *idle); extern cpumask_var_t cpu_isolated_map; @@ -211,8 +193,6 @@ extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); extern void io_schedule(void); -void __noreturn do_task_dead(void); - struct nsproxy; /** @@ -1120,24 +1100,6 @@ struct task_struct { */ }; -#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT -extern int arch_task_struct_size __read_mostly; -#else -# define arch_task_struct_size (sizeof(struct task_struct)) -#endif - -#ifdef CONFIG_VMAP_STACK -static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) -{ - return t->stack_vm_area; -} -#else -static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) -{ - return NULL; -} -#endif - static inline struct pid *task_pid(struct task_struct *task) { return task->pids[PIDTYPE_PID].pid; @@ -1429,21 +1391,6 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) TASK_PFA_TEST(LMK_WAITING, lmk_waiting) TASK_PFA_SET(LMK_WAITING, lmk_waiting) -static inline void rcu_copy_process(struct task_struct *p) -{ -#ifdef CONFIG_PREEMPT_RCU - p->rcu_read_lock_nesting = 0; - p->rcu_read_unlock_special.s = 0; - p->rcu_blocked_node = NULL; - INIT_LIST_HEAD(&p->rcu_node_entry); -#endif /* #ifdef CONFIG_PREEMPT_RCU */ -#ifdef CONFIG_TASKS_RCU - p->rcu_tasks_holdout = false; - INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); - p->rcu_tasks_idle_cpu = -1; -#endif /* #ifdef CONFIG_TASKS_RCU */ -} - static inline void tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags) { @@ -1572,45 +1519,11 @@ extern void wake_up_new_task(struct task_struct *tsk); #else static inline void kick_process(struct task_struct *tsk) { } #endif -extern int sched_fork(unsigned long clone_flags, struct task_struct *p); -extern void sched_dead(struct task_struct *p); - -extern void proc_caches_init(void); - -extern void release_task(struct task_struct * p); - -#ifdef CONFIG_HAVE_COPY_THREAD_TLS -extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, - struct task_struct *, unsigned long); -#else -extern int copy_thread(unsigned long, unsigned long, unsigned long, - struct task_struct *); - -/* Architectures that haven't opted into copy_thread_tls get the tls argument - * via pt_regs, so ignore the tls argument passed via C. */ -static inline int copy_thread_tls( - unsigned long clone_flags, unsigned long sp, unsigned long arg, - struct task_struct *p, unsigned long tls) -{ - return copy_thread(clone_flags, sp, arg, p); -} -#endif -extern void flush_thread(void); - -#ifdef CONFIG_HAVE_EXIT_THREAD -extern void exit_thread(struct task_struct *tsk); -#else -static inline void exit_thread(struct task_struct *tsk) -{ -} -#endif extern void exit_files(struct task_struct *); extern void exit_itimers(struct signal_struct *); -extern void do_group_exit(int); - extern int do_execve(struct filename *, const char __user * const __user *, const char __user * const __user *); @@ -1618,10 +1531,6 @@ extern int do_execveat(int, struct filename *, const char __user * const __user *, const char __user * const __user *, int); -extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long); -extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); -struct task_struct *fork_idle(int); -extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); static inline void set_task_comm(struct task_struct *tsk, const char *from) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 0023c91ff821..e93638a03515 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -1,6 +1,103 @@ #ifndef _LINUX_SCHED_TASK_H #define _LINUX_SCHED_TASK_H +/* + * Interface between the scheduler and various task lifetime (fork()/exit()) + * functionality: + */ + #include +/* + * This serializes "schedule()" and also protects + * the run-queue from deletions/modifications (but + * _adding_ to the beginning of the run-queue has + * a separate lock). + */ +extern rwlock_t tasklist_lock; +extern spinlock_t mmlist_lock; + +#ifdef CONFIG_PROVE_RCU +extern int lockdep_tasklist_lock_is_held(void); +#endif /* #ifdef CONFIG_PROVE_RCU */ + +extern asmlinkage void schedule_tail(struct task_struct *prev); +extern void init_idle(struct task_struct *idle, int cpu); +extern void init_idle_bootup_task(struct task_struct *idle); + +static inline void rcu_copy_process(struct task_struct *p) +{ +#ifdef CONFIG_PREEMPT_RCU + p->rcu_read_lock_nesting = 0; + p->rcu_read_unlock_special.s = 0; + p->rcu_blocked_node = NULL; + INIT_LIST_HEAD(&p->rcu_node_entry); +#endif /* #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_TASKS_RCU + p->rcu_tasks_holdout = false; + INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); + p->rcu_tasks_idle_cpu = -1; +#endif /* #ifdef CONFIG_TASKS_RCU */ +} + +extern int sched_fork(unsigned long clone_flags, struct task_struct *p); +extern void sched_dead(struct task_struct *p); + +void __noreturn do_task_dead(void); + +extern void proc_caches_init(void); + +extern void release_task(struct task_struct * p); + +#ifdef CONFIG_HAVE_COPY_THREAD_TLS +extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, + struct task_struct *, unsigned long); +#else +extern int copy_thread(unsigned long, unsigned long, unsigned long, + struct task_struct *); + +/* Architectures that haven't opted into copy_thread_tls get the tls argument + * via pt_regs, so ignore the tls argument passed via C. */ +static inline int copy_thread_tls( + unsigned long clone_flags, unsigned long sp, unsigned long arg, + struct task_struct *p, unsigned long tls) +{ + return copy_thread(clone_flags, sp, arg, p); +} +#endif +extern void flush_thread(void); + +#ifdef CONFIG_HAVE_EXIT_THREAD +extern void exit_thread(struct task_struct *tsk); +#else +static inline void exit_thread(struct task_struct *tsk) +{ +} +#endif +extern void do_group_exit(int); + +extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long); +extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); +struct task_struct *fork_idle(int); +extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); + + +#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT +extern int arch_task_struct_size __read_mostly; +#else +# define arch_task_struct_size (sizeof(struct task_struct)) +#endif + +#ifdef CONFIG_VMAP_STACK +static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) +{ + return t->stack_vm_area; +} +#else +static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) +{ + return NULL; +} +#endif + #endif /* _LINUX_SCHED_TASK_H */ -- cgit v1.2.3 From 6bfbaa51ed47774492d83d182a86068cc35aa4c6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 21:37:49 +0100 Subject: sched/headers, RCU: Move rcu_copy_process() from to kernel/fork.c Move rcu_copy_process() into kernel/fork.c, which is the only user of this inline function. This simplifies to the level that does not have to be included in it anymore - which change is done in a subsequent patch. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/task.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index e93638a03515..20ed9108f261 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -25,21 +25,6 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); -static inline void rcu_copy_process(struct task_struct *p) -{ -#ifdef CONFIG_PREEMPT_RCU - p->rcu_read_lock_nesting = 0; - p->rcu_read_unlock_special.s = 0; - p->rcu_blocked_node = NULL; - INIT_LIST_HEAD(&p->rcu_node_entry); -#endif /* #ifdef CONFIG_PREEMPT_RCU */ -#ifdef CONFIG_TASKS_RCU - p->rcu_tasks_holdout = false; - INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); - p->rcu_tasks_idle_cpu = -1; -#endif /* #ifdef CONFIG_TASKS_RCU */ -} - extern int sched_fork(unsigned long clone_flags, struct task_struct *p); extern void sched_dead(struct task_struct *p); -- cgit v1.2.3 From c7af7877eeacfeaaf6a1b6f54c481292ef116837 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 22:01:58 +0100 Subject: sched/core: Move, sort and clean up structure predeclarations Most of the structure predeclarations were at the head of sched.h, but not all of them - there were a number of lines spread around sched.h, in random places. Move them to the head, and also sort them alphabetically. Remove unused entries. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 56 +++++++++++++++++++++------------------------------ 1 file changed, 23 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b1677c8db03f..5398356e33c7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -33,24 +33,35 @@ #include -struct sched_attr; -struct sched_param; - -struct futex_pi_state; -struct robust_list_head; +/* task_struct member predeclarations: */ +struct audit_context; +struct autogroup; +struct backing_dev_info; struct bio_list; -struct fs_struct; -struct perf_event_context; struct blk_plug; +struct cfs_rq; struct filename; +struct fs_struct; +struct futex_pi_state; +struct io_context; +struct mempolicy; struct nameidata; - -struct signal_struct; -struct sighand_struct; - +struct nsproxy; +struct perf_event_context; +struct pid_namespace; +struct pipe_inode_info; +struct rcu_node; +struct reclaim_state; +struct robust_list_head; +struct sched_attr; +struct sched_param; struct seq_file; -struct cfs_rq; +struct sighand_struct; +struct signal_struct; +struct task_delay_info; struct task_group; +struct task_struct; +struct uts_namespace; /* * Task state bitmask. NOTE! These bits are also @@ -165,8 +176,6 @@ struct task_group; /* Task command name length */ #define TASK_COMM_LEN 16 -struct task_struct; - extern void sched_init(void); extern void sched_init_smp(void); @@ -193,8 +202,6 @@ extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); extern void io_schedule(void); -struct nsproxy; - /** * struct prev_cputime - snaphsot of system and user cputime * @utime: time spent in user mode @@ -297,10 +304,6 @@ struct thread_group_cputimer { }; #include -struct autogroup; - -struct backing_dev_info; -struct reclaim_state; #ifdef CONFIG_SCHED_INFO struct sched_info { @@ -314,8 +317,6 @@ struct sched_info { }; #endif /* CONFIG_SCHED_INFO */ -struct task_delay_info; - static inline int sched_info_on(void) { #ifdef CONFIG_SCHEDSTATS @@ -342,20 +343,12 @@ void force_schedstat_enabled(void); # define SCHED_FIXEDPOINT_SHIFT 10 # define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) -struct io_context; /* See blkdev.h */ - - #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK extern void prefetch_stack(struct task_struct *t); #else static inline void prefetch_stack(struct task_struct *t) { } #endif -struct audit_context; /* See audit.c */ -struct mempolicy; -struct pipe_inode_info; -struct uts_namespace; - struct load_weight { unsigned long weight; u32 inv_weight; @@ -564,7 +557,6 @@ union rcu_special { } b; /* Bits. */ u32 s; /* Set of bits. */ }; -struct rcu_node; enum perf_event_task_context { perf_invalid_context = -1, @@ -1125,8 +1117,6 @@ static inline struct pid *task_session(struct task_struct *task) return task->group_leader->pids[PIDTYPE_SID].pid; } -struct pid_namespace; - /* * the helpers to get the task's different pids as they are seen * from various namespaces -- cgit v1.2.3 From d04b0ad37e4b6ac39a56c823ae76ab37cd044dc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 22:07:57 +0100 Subject: sched/headers: Move the PREEMPT_COUNT defines from to These defines are not really part of the scheduler's driver API, but are related to the preempt count - so move them to . Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 21 +++++++++++++++++++++ include/linux/sched.h | 21 --------------------- 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 7eeceac52dea..cae461224948 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -55,6 +55,27 @@ /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 +#define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) + +/* + * Disable preemption until the scheduler is running -- use an unconditional + * value so that it also works on !PREEMPT_COUNT kernels. + * + * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count(). + */ +#define INIT_PREEMPT_COUNT PREEMPT_OFFSET + +/* + * Initial preempt_count value; reflects the preempt_count schedule invariant + * which states that during context switches: + * + * preempt_count() == 2*PREEMPT_DISABLE_OFFSET + * + * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels. + * Note: See finish_task_switch(). + */ +#define FORK_PREEMPT_COUNT (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) + /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */ #include diff --git a/include/linux/sched.h b/include/linux/sched.h index 5398356e33c7..3b3e31da416e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -265,27 +265,6 @@ struct task_cputime_atomic { .sum_exec_runtime = ATOMIC64_INIT(0), \ } -#define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) - -/* - * Disable preemption until the scheduler is running -- use an unconditional - * value so that it also works on !PREEMPT_COUNT kernels. - * - * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count(). - */ -#define INIT_PREEMPT_COUNT PREEMPT_OFFSET - -/* - * Initial preempt_count value; reflects the preempt_count schedule invariant - * which states that during context switches: - * - * preempt_count() == 2*PREEMPT_DISABLE_OFFSET - * - * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels. - * Note: See finish_task_switch(). - */ -#define FORK_PREEMPT_COUNT (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) - /** * struct thread_group_cputimer - thread group interval timer counts * @cputime_atomic: atomic thread group interval timers. -- cgit v1.2.3 From f3ac60671954c8d413532627b1be13a76f394c49 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 22:59:33 +0100 Subject: sched/headers: Move task-stack related APIs from to Split out the task->stack related functionality, which is not really part of the core scheduler APIs. Only keep task_thread_info() because it's used by sched.h. Update the code that uses those facilities. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 115 +++------------------------------------ include/linux/sched/task_stack.h | 104 +++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 106 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3b3e31da416e..af9590c8bfb0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1450,6 +1450,15 @@ union thread_union { unsigned long stack[THREAD_SIZE/sizeof(long)]; }; +#ifdef CONFIG_THREAD_INFO_IN_TASK +static inline struct thread_info *task_thread_info(struct task_struct *task) +{ + return &task->thread_info; +} +#elif !defined(__HAVE_THREAD_FUNCTIONS) +# define task_thread_info(task) ((struct thread_info *)(task)->stack) +#endif + #ifndef __HAVE_ARCH_KSTACK_END static inline int kstack_end(void *addr) { @@ -1540,112 +1549,6 @@ static inline void task_unlock(struct task_struct *p) spin_unlock(&p->alloc_lock); } -#ifdef CONFIG_THREAD_INFO_IN_TASK - -static inline struct thread_info *task_thread_info(struct task_struct *task) -{ - return &task->thread_info; -} - -/* - * When accessing the stack of a non-current task that might exit, use - * try_get_task_stack() instead. task_stack_page will return a pointer - * that could get freed out from under you. - */ -static inline void *task_stack_page(const struct task_struct *task) -{ - return task->stack; -} - -#define setup_thread_stack(new,old) do { } while(0) - -static inline unsigned long *end_of_stack(const struct task_struct *task) -{ - return task->stack; -} - -#elif !defined(__HAVE_THREAD_FUNCTIONS) - -#define task_thread_info(task) ((struct thread_info *)(task)->stack) -#define task_stack_page(task) ((void *)(task)->stack) - -static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) -{ - *task_thread_info(p) = *task_thread_info(org); - task_thread_info(p)->task = p; -} - -/* - * Return the address of the last usable long on the stack. - * - * When the stack grows down, this is just above the thread - * info struct. Going any lower will corrupt the threadinfo. - * - * When the stack grows up, this is the highest address. - * Beyond that position, we corrupt data on the next page. - */ -static inline unsigned long *end_of_stack(struct task_struct *p) -{ -#ifdef CONFIG_STACK_GROWSUP - return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1; -#else - return (unsigned long *)(task_thread_info(p) + 1); -#endif -} - -#endif - -#ifdef CONFIG_THREAD_INFO_IN_TASK -static inline void *try_get_task_stack(struct task_struct *tsk) -{ - return atomic_inc_not_zero(&tsk->stack_refcount) ? - task_stack_page(tsk) : NULL; -} - -extern void put_task_stack(struct task_struct *tsk); -#else -static inline void *try_get_task_stack(struct task_struct *tsk) -{ - return task_stack_page(tsk); -} - -static inline void put_task_stack(struct task_struct *tsk) {} -#endif - -#define task_stack_end_corrupted(task) \ - (*(end_of_stack(task)) != STACK_END_MAGIC) - -static inline int object_is_on_stack(void *obj) -{ - void *stack = task_stack_page(current); - - return (obj >= stack) && (obj < (stack + THREAD_SIZE)); -} - -extern void thread_stack_cache_init(void); - -#ifdef CONFIG_DEBUG_STACK_USAGE -static inline unsigned long stack_not_used(struct task_struct *p) -{ - unsigned long *n = end_of_stack(p); - - do { /* Skip over canary */ -# ifdef CONFIG_STACK_GROWSUP - n--; -# else - n++; -# endif - } while (!*n); - -# ifdef CONFIG_STACK_GROWSUP - return (unsigned long)end_of_stack(p) - (unsigned long)n; -# else - return (unsigned long)n - (unsigned long)end_of_stack(p); -# endif -} -#endif -extern void set_task_stack_end_magic(struct task_struct *tsk); - /* set thread flags in other task's structures * - see asm/thread_info.h for TIF_xxxx flags available */ diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index d2d578e85f7d..aaa5c2a6a0e9 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -1,7 +1,111 @@ #ifndef _LINUX_SCHED_TASK_STACK_H #define _LINUX_SCHED_TASK_STACK_H +/* + * task->stack (kernel stack) handling interfaces: + */ + #include #include +#ifdef CONFIG_THREAD_INFO_IN_TASK + +/* + * When accessing the stack of a non-current task that might exit, use + * try_get_task_stack() instead. task_stack_page will return a pointer + * that could get freed out from under you. + */ +static inline void *task_stack_page(const struct task_struct *task) +{ + return task->stack; +} + +#define setup_thread_stack(new,old) do { } while(0) + +static inline unsigned long *end_of_stack(const struct task_struct *task) +{ + return task->stack; +} + +#elif !defined(__HAVE_THREAD_FUNCTIONS) + +#define task_stack_page(task) ((void *)(task)->stack) + +static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org) +{ + *task_thread_info(p) = *task_thread_info(org); + task_thread_info(p)->task = p; +} + +/* + * Return the address of the last usable long on the stack. + * + * When the stack grows down, this is just above the thread + * info struct. Going any lower will corrupt the threadinfo. + * + * When the stack grows up, this is the highest address. + * Beyond that position, we corrupt data on the next page. + */ +static inline unsigned long *end_of_stack(struct task_struct *p) +{ +#ifdef CONFIG_STACK_GROWSUP + return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1; +#else + return (unsigned long *)(task_thread_info(p) + 1); +#endif +} + +#endif + +#ifdef CONFIG_THREAD_INFO_IN_TASK +static inline void *try_get_task_stack(struct task_struct *tsk) +{ + return atomic_inc_not_zero(&tsk->stack_refcount) ? + task_stack_page(tsk) : NULL; +} + +extern void put_task_stack(struct task_struct *tsk); +#else +static inline void *try_get_task_stack(struct task_struct *tsk) +{ + return task_stack_page(tsk); +} + +static inline void put_task_stack(struct task_struct *tsk) {} +#endif + +#define task_stack_end_corrupted(task) \ + (*(end_of_stack(task)) != STACK_END_MAGIC) + +static inline int object_is_on_stack(void *obj) +{ + void *stack = task_stack_page(current); + + return (obj >= stack) && (obj < (stack + THREAD_SIZE)); +} + +extern void thread_stack_cache_init(void); + +#ifdef CONFIG_DEBUG_STACK_USAGE +static inline unsigned long stack_not_used(struct task_struct *p) +{ + unsigned long *n = end_of_stack(p); + + do { /* Skip over canary */ +# ifdef CONFIG_STACK_GROWSUP + n--; +# else + n++; +# endif + } while (!*n); + +# ifdef CONFIG_STACK_GROWSUP + return (unsigned long)end_of_stack(p) - (unsigned long)n; +# else + return (unsigned long)n - (unsigned long)end_of_stack(p); +# endif +} +#endif +extern void set_task_stack_end_magic(struct task_struct *tsk); + #endif /* _LINUX_SCHED_TASK_STACK_H */ -- cgit v1.2.3 From 70806b21e4d64f01193a2b64a053b75ff53a2b10 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 23:15:21 +0100 Subject: sched/headers: Move the 'root_task_group' declaration to Also remove the duplicate declaration from . ( That declaration was originally duplicated for dependency hell reasons, but there's no problem including the much smaller header now, to pick up the right prototype. ) Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 2 -- include/linux/sched.h | 4 ---- include/linux/sched/autogroup.h | 5 +++++ 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f6841c19c913..91d9049f0039 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -151,8 +151,6 @@ extern struct group_info init_groups; extern struct cred init_cred; -extern struct task_group root_task_group; - #ifdef CONFIG_CGROUP_SCHED # define INIT_CGROUP_SCHED(tsk) \ .sched_task_group = &root_task_group, diff --git a/include/linux/sched.h b/include/linux/sched.h index af9590c8bfb0..4934733bd6cb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1707,10 +1707,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); -#ifdef CONFIG_CGROUP_SCHED -extern struct task_group root_task_group; -#endif /* CONFIG_CGROUP_SCHED */ - extern int task_can_switch_user(struct user_struct *up, struct task_struct *tsk); diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h index 586bdf37330d..fd6855548d0c 100644 --- a/include/linux/sched/autogroup.h +++ b/include/linux/sched/autogroup.h @@ -4,6 +4,7 @@ #include struct signal_struct; +struct task_group; struct seq_file; #ifdef CONFIG_SCHED_AUTOGROUP @@ -24,4 +25,8 @@ static inline void sched_autogroup_exit(struct signal_struct *sig) { } static inline void sched_autogroup_exit_task(struct task_struct *p) { } #endif +#ifdef CONFIG_CGROUP_SCHED +extern struct task_group root_task_group; +#endif /* CONFIG_CGROUP_SCHED */ + #endif /* _LINUX_SCHED_AUTOGROUP_H */ -- cgit v1.2.3 From 76960dbf9b235b957d9d730be2c6c2a70f709026 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 23:43:50 +0100 Subject: signals: Move signal data types from to Separate out just the pure data types - sched.h will be able to use this reduced size header. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/signal.h | 54 ---------------------------------------- include/linux/signal_types.h | 59 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index d1c2b05a7a55..94ad6eea9550 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -8,24 +8,6 @@ struct task_struct; /* for sysctl */ extern int print_fatal_signals; -/* - * Real Time signals may be queued. - */ - -struct sigqueue { - struct list_head list; - int flags; - siginfo_t info; - struct user_struct *user; -}; - -/* flags values. */ -#define SIGQUEUE_PREALLOC 1 - -struct sigpending { - struct list_head list; - sigset_t signal; -}; #ifndef HAVE_ARCH_COPY_SIGINFO @@ -271,42 +253,6 @@ extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; -struct sigaction { -#ifndef __ARCH_HAS_IRIX_SIGACTION - __sighandler_t sa_handler; - unsigned long sa_flags; -#else - unsigned int sa_flags; - __sighandler_t sa_handler; -#endif -#ifdef __ARCH_HAS_SA_RESTORER - __sigrestore_t sa_restorer; -#endif - sigset_t sa_mask; /* mask last for extensibility */ -}; - -struct k_sigaction { - struct sigaction sa; -#ifdef __ARCH_HAS_KA_RESTORER - __sigrestore_t ka_restorer; -#endif -}; - -#ifdef CONFIG_OLD_SIGACTION -struct old_sigaction { - __sighandler_t sa_handler; - old_sigset_t sa_mask; - unsigned long sa_flags; - __sigrestore_t sa_restorer; -}; -#endif - -struct ksignal { - struct k_sigaction ka; - siginfo_t info; - int sig; -}; - extern int get_signal(struct ksignal *ksig); extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping); extern void exit_signals(struct task_struct *tsk); diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h index 28799c88f490..16d862a3d8f3 100644 --- a/include/linux/signal_types.h +++ b/include/linux/signal_types.h @@ -1,7 +1,66 @@ #ifndef _LINUX_SIGNAL_TYPES_H #define _LINUX_SIGNAL_TYPES_H +/* + * Basic signal handling related data type definitions: + */ + #include #include +/* + * Real Time signals may be queued. + */ + +struct sigqueue { + struct list_head list; + int flags; + siginfo_t info; + struct user_struct *user; +}; + +/* flags values. */ +#define SIGQUEUE_PREALLOC 1 + +struct sigpending { + struct list_head list; + sigset_t signal; +}; + +struct sigaction { +#ifndef __ARCH_HAS_IRIX_SIGACTION + __sighandler_t sa_handler; + unsigned long sa_flags; +#else + unsigned int sa_flags; + __sighandler_t sa_handler; +#endif +#ifdef __ARCH_HAS_SA_RESTORER + __sigrestore_t sa_restorer; +#endif + sigset_t sa_mask; /* mask last for extensibility */ +}; + +struct k_sigaction { + struct sigaction sa; +#ifdef __ARCH_HAS_KA_RESTORER + __sigrestore_t ka_restorer; +#endif +}; + +#ifdef CONFIG_OLD_SIGACTION +struct old_sigaction { + __sighandler_t sa_handler; + old_sigset_t sa_mask; + unsigned long sa_flags; + __sigrestore_t sa_restorer; +}; +#endif + +struct ksignal { + struct k_sigaction ka; + siginfo_t info; + int sig; +}; + #endif /* _LINUX_SIGNAL_TYPES_H */ -- cgit v1.2.3 From 9e7d2e44dd88ba7e29c165b6fca428e384afa5a8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 4 Feb 2017 00:12:19 +0100 Subject: mm/headers, sched/headers: Move task related MM types from to Separate all the MM types that are embedded directly in 'struct task_struct' into the header. The goal is to include this header in , not the full header, to reduce the size, complexity and coupling of . (This patch does not change yet.) Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/mm_types.h | 49 -------------------------------------- include/linux/mm_types_task.h | 55 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 28bc710d3467..f60f45fe226f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -24,11 +24,6 @@ struct address_space; struct mem_cgroup; -#define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) -#define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \ - IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) -#define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) - /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -231,17 +226,6 @@ struct page { #endif ; -struct page_frag { - struct page *page; -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) - __u32 offset; - __u32 size; -#else - __u16 offset; - __u16 size; -#endif -}; - #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) @@ -360,18 +344,6 @@ struct vm_area_struct { struct vm_userfaultfd_ctx vm_userfaultfd_ctx; }; -/* - * The per task VMA cache array: - */ -#define VMACACHE_BITS 2 -#define VMACACHE_SIZE (1U << VMACACHE_BITS) -#define VMACACHE_MASK (VMACACHE_SIZE - 1) - -struct vmacache { - u32 seqnum; - struct vm_area_struct *vmas[VMACACHE_SIZE]; -}; - struct core_thread { struct task_struct *task; struct core_thread *next; @@ -383,27 +355,6 @@ struct core_state { struct completion startup; }; -enum { - MM_FILEPAGES, /* Resident file mapping pages */ - MM_ANONPAGES, /* Resident anonymous pages */ - MM_SWAPENTS, /* Anonymous swap entries */ - MM_SHMEMPAGES, /* Resident shared memory pages */ - NR_MM_COUNTERS -}; - -#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU) -#define SPLIT_RSS_COUNTING -/* per-thread cached information, */ -struct task_rss_stat { - int events; /* for synchronization threshold */ - int count[NR_MM_COUNTERS]; -}; -#endif /* USE_SPLIT_PTE_PTLOCKS */ - -struct mm_rss_stat { - atomic_long_t count[NR_MM_COUNTERS]; -}; - struct kioctx_table; struct mm_struct { struct vm_area_struct *mmap; /* list of VMAs */ diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index 2419d302f03c..9526d8b9fe0e 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -1,10 +1,65 @@ #ifndef _LINUX_MM_TYPES_TASK_H #define _LINUX_MM_TYPES_TASK_H +/* + * Here are the definitions of the MM data types that are embedded in 'struct task_struct'. + * + * (These are defined separately to decouple sched.h from mm_types.h as much as possible.) + */ + #include #include #include #include +#define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) +#define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \ + IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) +#define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) + +/* + * The per task VMA cache array: + */ +#define VMACACHE_BITS 2 +#define VMACACHE_SIZE (1U << VMACACHE_BITS) +#define VMACACHE_MASK (VMACACHE_SIZE - 1) + +struct vmacache { + u32 seqnum; + struct vm_area_struct *vmas[VMACACHE_SIZE]; +}; + +enum { + MM_FILEPAGES, /* Resident file mapping pages */ + MM_ANONPAGES, /* Resident anonymous pages */ + MM_SWAPENTS, /* Anonymous swap entries */ + MM_SHMEMPAGES, /* Resident shared memory pages */ + NR_MM_COUNTERS +}; + +#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU) +#define SPLIT_RSS_COUNTING +/* per-thread cached information, */ +struct task_rss_stat { + int events; /* for synchronization threshold */ + int count[NR_MM_COUNTERS]; +}; +#endif /* USE_SPLIT_PTE_PTLOCKS */ + +struct mm_rss_stat { + atomic_long_t count[NR_MM_COUNTERS]; +}; + +struct page_frag { + struct page *page; +#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) + __u32 offset; + __u32 size; +#else + __u16 offset; + __u16 size; +#endif +}; + #endif /* _LINUX_MM_TYPES_TASK_H */ -- cgit v1.2.3 From 77ba809e8b39b4e384df0433e2bd3dd0907dad29 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 4 Feb 2017 00:16:44 +0100 Subject: sched/headers: Remove the dependency from Use the freshly introduced, reduced size header instead. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4934733bd6cb..3eb284741790 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include -- cgit v1.2.3 From cdc75e9f7b14f29efcf4b162a3c673733e96db79 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 4 Feb 2017 01:20:53 +0100 Subject: sched/headers: Move 'init_task' and 'init_thread_union' from to 'init_task' is really not part of core scheduler APIs but part of the fork() interface between the scheduler and process management. So move the declarations. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 --- include/linux/sched/task.h | 6 ++++++ 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3eb284741790..4ab105a2a8f9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1469,9 +1469,6 @@ static inline int kstack_end(void *addr) } #endif -extern union thread_union init_thread_union; -extern struct task_struct init_task; - extern struct pid_namespace init_pid_ns; /* diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 20ed9108f261..1be049a18d1b 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -8,6 +8,9 @@ #include +struct task_struct; +union thread_union; + /* * This serializes "schedule()" and also protects * the run-queue from deletions/modifications (but @@ -17,6 +20,9 @@ extern rwlock_t tasklist_lock; extern spinlock_t mmlist_lock; +extern union thread_union init_thread_union; +extern struct task_struct init_task; + #ifdef CONFIG_PROVE_RCU extern int lockdep_tasklist_lock_is_held(void); #endif /* #ifdef CONFIG_PROVE_RCU */ -- cgit v1.2.3 From 56cd697366b6d6f67acb6c58ac7f3b185d11ef07 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 6 Feb 2017 10:57:33 +0100 Subject: sched/headers: Move the task_lock()/unlock() APIs to The task_lock()/task_unlock() APIs are not realated to core scheduling, they are task lifetime APIs, i.e. they belong into . Move them. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 20 -------------------- include/linux/sched/task.h | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ab105a2a8f9..d481c129a822 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1526,26 +1526,6 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, } #endif -/* - * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring - * subscriptions and synchronises with wait4(). Also used in procfs. Also - * pins the final release of task.io_context. Also protects ->cpuset and - * ->cgroup.subsys[]. And ->vfork_done. - * - * Nests both inside and outside of read_lock(&tasklist_lock). - * It must not be nested with write_lock_irq(&tasklist_lock), - * neither inside nor outside. - */ -static inline void task_lock(struct task_struct *p) -{ - spin_lock(&p->alloc_lock); -} - -static inline void task_unlock(struct task_struct *p) -{ - spin_unlock(&p->alloc_lock); -} - /* set thread flags in other task's structures * - see asm/thread_info.h for TIF_xxxx flags available */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 1be049a18d1b..2be9fde588a7 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -91,4 +91,24 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) } #endif +/* + * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring + * subscriptions and synchronises with wait4(). Also used in procfs. Also + * pins the final release of task.io_context. Also protects ->cpuset and + * ->cgroup.subsys[]. And ->vfork_done. + * + * Nests both inside and outside of read_lock(&tasklist_lock). + * It must not be nested with write_lock_irq(&tasklist_lock), + * neither inside nor outside. + */ +static inline void task_lock(struct task_struct *p) +{ + spin_lock(&p->alloc_lock); +} + +static inline void task_unlock(struct task_struct *p) +{ + spin_unlock(&p->alloc_lock); +} + #endif /* _LINUX_SCHED_TASK_H */ -- cgit v1.2.3 From 1050b27c52f615bc0e772b3119881e5304ccde6b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 11:48:36 +0100 Subject: sched/headers: Move cputime functionality from and into Move cputime related functionality out of , as most code that includes does not use that functionality. Move data types that are not included in task_struct directly to the signal definitions, into . Also merge the (small) existing header into . Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/cputime.h | 13 --- include/linux/sched.h | 89 --------------------- include/linux/sched/cputime.h | 182 +++++++++++++++++++++++++++++++++++++++++- include/linux/sched/signal.h | 33 ++++++++ 4 files changed, 214 insertions(+), 103 deletions(-) delete mode 100644 include/linux/cputime.h (limited to 'include/linux') diff --git a/include/linux/cputime.h b/include/linux/cputime.h deleted file mode 100644 index a691dc4ddc13..000000000000 --- a/include/linux/cputime.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __LINUX_CPUTIME_H -#define __LINUX_CPUTIME_H - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -#include - -#ifndef cputime_to_nsecs -# define cputime_to_nsecs(__ct) \ - (cputime_to_usecs(__ct) * NSEC_PER_USEC) -#endif - -#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -#endif /* __LINUX_CPUTIME_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index d481c129a822..2b43f55e4956 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -219,14 +219,6 @@ struct prev_cputime { #endif }; -static inline void prev_cputime_init(struct prev_cputime *prev) -{ -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - prev->utime = prev->stime = 0; - raw_spin_lock_init(&prev->lock); -#endif -} - /** * struct task_cputime - collected CPU time counts * @utime: time spent in user mode, in nanoseconds @@ -248,40 +240,6 @@ struct task_cputime { #define prof_exp stime #define sched_exp sum_exec_runtime -/* - * This is the atomic variant of task_cputime, which can be used for - * storing and updating task_cputime statistics without locking. - */ -struct task_cputime_atomic { - atomic64_t utime; - atomic64_t stime; - atomic64_t sum_exec_runtime; -}; - -#define INIT_CPUTIME_ATOMIC \ - (struct task_cputime_atomic) { \ - .utime = ATOMIC64_INIT(0), \ - .stime = ATOMIC64_INIT(0), \ - .sum_exec_runtime = ATOMIC64_INIT(0), \ - } - -/** - * struct thread_group_cputimer - thread group interval timer counts - * @cputime_atomic: atomic thread group interval timers. - * @running: true when there are timers running and - * @cputime_atomic receives updates. - * @checking_timer: true when a thread in the group is in the - * process of checking for thread group timers. - * - * This structure contains the version of task_cputime, above, that is - * used for thread group CPU timer calculations. - */ -struct thread_group_cputimer { - struct task_cputime_atomic cputime_atomic; - bool running; - bool checking_timer; -}; - #include #ifdef CONFIG_SCHED_INFO @@ -1234,44 +1192,6 @@ static inline void put_task_struct(struct task_struct *t) struct task_struct *task_rcu_dereference(struct task_struct **ptask); struct task_struct *try_get_task_struct(struct task_struct **ptask); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -extern void task_cputime(struct task_struct *t, - u64 *utime, u64 *stime); -extern u64 task_gtime(struct task_struct *t); -#else -static inline void task_cputime(struct task_struct *t, - u64 *utime, u64 *stime) -{ - *utime = t->utime; - *stime = t->stime; -} - -static inline u64 task_gtime(struct task_struct *t) -{ - return t->gtime; -} -#endif - -#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME -static inline void task_cputime_scaled(struct task_struct *t, - u64 *utimescaled, - u64 *stimescaled) -{ - *utimescaled = t->utimescaled; - *stimescaled = t->stimescaled; -} -#else -static inline void task_cputime_scaled(struct task_struct *t, - u64 *utimescaled, - u64 *stimescaled) -{ - task_cputime(t, utimescaled, stimescaled); -} -#endif - -extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); -extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); - /* * Per process flags */ @@ -1395,9 +1315,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, #define cpu_relax_yield() cpu_relax() #endif -extern unsigned long long -task_sched_runtime(struct task_struct *task); - /* sched_exec is called by processes performing an exec */ #ifdef CONFIG_SMP extern void sched_exec(void); @@ -1629,12 +1546,6 @@ static __always_inline bool need_resched(void) return unlikely(tif_need_resched()); } -/* - * Thread group CPU time accounting. - */ -void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); -void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); - /* * Wrappers for p->thread_info->cpu access. No-op on UP. */ diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 6ed4fe43de28..4c5b9735c1ae 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -2,6 +2,186 @@ #define _LINUX_SCHED_CPUTIME_H #include -#include + +/* + * cputime accounting APIs: + */ + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +#include + +#ifndef cputime_to_nsecs +# define cputime_to_nsecs(__ct) \ + (cputime_to_usecs(__ct) * NSEC_PER_USEC) +#endif +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern void task_cputime(struct task_struct *t, + u64 *utime, u64 *stime); +extern u64 task_gtime(struct task_struct *t); +#else +static inline void task_cputime(struct task_struct *t, + u64 *utime, u64 *stime) +{ + *utime = t->utime; + *stime = t->stime; +} + +static inline u64 task_gtime(struct task_struct *t) +{ + return t->gtime; +} +#endif + +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME +static inline void task_cputime_scaled(struct task_struct *t, + u64 *utimescaled, + u64 *stimescaled) +{ + *utimescaled = t->utimescaled; + *stimescaled = t->stimescaled; +} +#else +static inline void task_cputime_scaled(struct task_struct *t, + u64 *utimescaled, + u64 *stimescaled) +{ + task_cputime(t, utimescaled, stimescaled); +} +#endif + +extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); +extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); + + +/* + * Thread group CPU time accounting. + */ +void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); +void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); + + +/* + * The following are functions that support scheduler-internal time accounting. + * These functions are generally called at the timer tick. None of this depends + * on CONFIG_SCHEDSTATS. + */ + +/** + * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running + * + * @tsk: Pointer to target task. + */ +#ifdef CONFIG_POSIX_TIMERS +static inline +struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) +{ + struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + + /* Check if cputimer isn't running. This is accessed without locking. */ + if (!READ_ONCE(cputimer->running)) + return NULL; + + /* + * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime + * in __exit_signal(), we won't account to the signal struct further + * cputime consumed by that task, even though the task can still be + * ticking after __exit_signal(). + * + * In order to keep a consistent behaviour between thread group cputime + * and thread group cputimer accounting, lets also ignore the cputime + * elapsing after __exit_signal() in any thread group timer running. + * + * This makes sure that POSIX CPU clocks and timers are synchronized, so + * that a POSIX CPU timer won't expire while the corresponding POSIX CPU + * clock delta is behind the expiring timer value. + */ + if (unlikely(!tsk->sighand)) + return NULL; + + return cputimer; +} +#else +static inline +struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) +{ + return NULL; +} +#endif + +/** + * account_group_user_time - Maintain utime for a thread group. + * + * @tsk: Pointer to task structure. + * @cputime: Time value by which to increment the utime field of the + * thread_group_cputime structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the utime field there. + */ +static inline void account_group_user_time(struct task_struct *tsk, + u64 cputime) +{ + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); + + if (!cputimer) + return; + + atomic64_add(cputime, &cputimer->cputime_atomic.utime); +} + +/** + * account_group_system_time - Maintain stime for a thread group. + * + * @tsk: Pointer to task structure. + * @cputime: Time value by which to increment the stime field of the + * thread_group_cputime structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the stime field there. + */ +static inline void account_group_system_time(struct task_struct *tsk, + u64 cputime) +{ + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); + + if (!cputimer) + return; + + atomic64_add(cputime, &cputimer->cputime_atomic.stime); +} + +/** + * account_group_exec_runtime - Maintain exec runtime for a thread group. + * + * @tsk: Pointer to task structure. + * @ns: Time value by which to increment the sum_exec_runtime field + * of the thread_group_cputime structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the sum_exec_runtime field there. + */ +static inline void account_group_exec_runtime(struct task_struct *tsk, + unsigned long long ns) +{ + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); + + if (!cputimer) + return; + + atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime); +} + +static inline void prev_cputime_init(struct prev_cputime *prev) +{ +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + prev->utime = prev->stime = 0; + raw_spin_lock_init(&prev->lock); +#endif +} + +extern unsigned long long +task_sched_runtime(struct task_struct *task); #endif /* _LINUX_SCHED_CPUTIME_H */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index b3545fb4333a..2cf446704cd4 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -35,6 +35,39 @@ struct cpu_itimer { u64 incr; }; +/* + * This is the atomic variant of task_cputime, which can be used for + * storing and updating task_cputime statistics without locking. + */ +struct task_cputime_atomic { + atomic64_t utime; + atomic64_t stime; + atomic64_t sum_exec_runtime; +}; + +#define INIT_CPUTIME_ATOMIC \ + (struct task_cputime_atomic) { \ + .utime = ATOMIC64_INIT(0), \ + .stime = ATOMIC64_INIT(0), \ + .sum_exec_runtime = ATOMIC64_INIT(0), \ + } +/** + * struct thread_group_cputimer - thread group interval timer counts + * @cputime_atomic: atomic thread group interval timers. + * @running: true when there are timers running and + * @cputime_atomic receives updates. + * @checking_timer: true when a thread in the group is in the + * process of checking for thread group timers. + * + * This structure contains the version of task_cputime, above, that is + * used for thread group CPU timer calculations. + */ +struct thread_group_cputimer { + struct task_cputime_atomic cputime_atomic; + bool running; + bool checking_timer; +}; + /* * NOTE! "signal_struct" does not have its own * locking, because a shared signal_struct always -- cgit v1.2.3 From a2d7a7463c38dd14b845721aac3583a26a97c66d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 12:07:04 +0100 Subject: sched/headers: Move sched_info_on() and force_schedstat_enabled() from to These APIs are not core scheduler but scheduler statistics related. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 16 ---------------- include/linux/sched/stat.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2b43f55e4956..707eee099332 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -254,22 +254,6 @@ struct sched_info { }; #endif /* CONFIG_SCHED_INFO */ -static inline int sched_info_on(void) -{ -#ifdef CONFIG_SCHEDSTATS - return 1; -#elif defined(CONFIG_TASK_DELAY_ACCT) - extern int delayacct_on; - return delayacct_on; -#else - return 0; -#endif -} - -#ifdef CONFIG_SCHEDSTATS -void force_schedstat_enabled(void); -#endif - /* * Integer metrics need fixed point arithmetic, e.g., sched/fair * has a few: load, load_avg, util_avg, freq, and capacity. diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h index 0d52ceb6d3ae..d8cedf27083e 100644 --- a/include/linux/sched/stat.h +++ b/include/linux/sched/stat.h @@ -21,4 +21,20 @@ extern unsigned long nr_iowait(void); extern unsigned long nr_iowait_cpu(int cpu); extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); +static inline int sched_info_on(void) +{ +#ifdef CONFIG_SCHEDSTATS + return 1; +#elif defined(CONFIG_TASK_DELAY_ACCT) + extern int delayacct_on; + return delayacct_on; +#else + return 0; +#endif +} + +#ifdef CONFIG_SCHEDSTATS +void force_schedstat_enabled(void); +#endif + #endif /* _LINUX_SCHED_STAT_H */ -- cgit v1.2.3 From 28851755990c832d7050d5e1e2c91ed9d9e6fe59 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 14:24:31 +0100 Subject: sched/headers, vfs/execve: Move the do_execve*() prototypes from to These are not scheduler related. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/binfmts.h | 10 ++++++++++ include/linux/sched.h | 8 -------- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 1303b570b18c..05488da3aee9 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -6,6 +6,8 @@ #include #include +struct filename; + #define CORENAME_MAX_SIZE 128 /* @@ -123,4 +125,12 @@ extern void install_exec_creds(struct linux_binprm *bprm); extern void set_binfmt(struct linux_binfmt *new); extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t); +extern int do_execve(struct filename *, + const char __user * const __user *, + const char __user * const __user *); +extern int do_execveat(int, struct filename *, + const char __user * const __user *, + const char __user * const __user *, + int); + #endif /* _LINUX_BINFMTS_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 707eee099332..5f2267e41fde 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1400,14 +1400,6 @@ extern void exit_files(struct task_struct *); extern void exit_itimers(struct signal_struct *); -extern int do_execve(struct filename *, - const char __user * const __user *, - const char __user * const __user *); -extern int do_execveat(int, struct filename *, - const char __user * const __user *, - const char __user * const __user *, - int); - extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); static inline void set_task_comm(struct task_struct *tsk, const char *from) { -- cgit v1.2.3 From 9049863a32fad5d7158318e45f0a41a0f2192c0c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 14:31:22 +0100 Subject: sched/headers: Move kstack_end() from to This is a task-stack related API, not core scheduler functionality. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 10 ---------- include/linux/sched/task_stack.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5f2267e41fde..309eb76b7eab 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1360,16 +1360,6 @@ static inline struct thread_info *task_thread_info(struct task_struct *task) # define task_thread_info(task) ((struct thread_info *)(task)->stack) #endif -#ifndef __HAVE_ARCH_KSTACK_END -static inline int kstack_end(void *addr) -{ - /* Reliable end of stack detection: - * Some APM bios versions misalign the stack - */ - return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); -} -#endif - extern struct pid_namespace init_pid_ns; /* diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index aaa5c2a6a0e9..df6ea6665b31 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -108,4 +108,14 @@ static inline unsigned long stack_not_used(struct task_struct *p) #endif extern void set_task_stack_end_magic(struct task_struct *tsk); +#ifndef __HAVE_ARCH_KSTACK_END +static inline int kstack_end(void *addr) +{ + /* Reliable end of stack detection: + * Some APM bios versions misalign the stack + */ + return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); +} +#endif + #endif /* _LINUX_SCHED_TASK_STACK_H */ -- cgit v1.2.3 From 42011db0ed5a9c92b1281e1300eb3d026f3764a8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 14:35:41 +0100 Subject: sched/headers: Move exit_files() and exit_itimers() from to These two functions are task management related, not core scheduler APIs. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ---- include/linux/sched/task.h | 3 +++ 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 309eb76b7eab..4a03c49e3bcc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1386,10 +1386,6 @@ extern void wake_up_new_task(struct task_struct *tsk); static inline void kick_process(struct task_struct *tsk) { } #endif -extern void exit_files(struct task_struct *); - -extern void exit_itimers(struct signal_struct *); - extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); static inline void set_task_comm(struct task_struct *tsk, const char *from) { diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 2be9fde588a7..a33a8121da9a 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -67,6 +67,9 @@ static inline void exit_thread(struct task_struct *tsk) #endif extern void do_group_exit(int); +extern void exit_files(struct task_struct *); +extern void exit_itimers(struct signal_struct *); + extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long); extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); -- cgit v1.2.3 From c5a21921d55a2aee9d1e031a78cef6cda3eab78b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 14:47:12 +0100 Subject: sched/headers: Move _init() prototypes from to trap_init() and cpu_init() belong into , sched_init*() into . Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 5 ----- include/linux/sched/init.h | 7 +++++++ 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4a03c49e3bcc..0fb56ae0abc7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -176,15 +176,10 @@ struct uts_namespace; /* Task command name length */ #define TASK_COMM_LEN 16 -extern void sched_init(void); -extern void sched_init_smp(void); - extern cpumask_var_t cpu_isolated_map; extern int runqueue_is_locked(int cpu); -extern void cpu_init (void); -extern void trap_init(void); extern void update_process_times(int user); extern void scheduler_tick(void); diff --git a/include/linux/sched/init.h b/include/linux/sched/init.h index 15e46313e55e..f31d16075857 100644 --- a/include/linux/sched/init.h +++ b/include/linux/sched/init.h @@ -3,4 +3,11 @@ #include +/* + * Scheduler init related prototypes: + */ + +extern void sched_init(void); +extern void sched_init_smp(void); + #endif /* _LINUX_SCHED_INIT_H */ -- cgit v1.2.3 From 93b5a9a7051e51ce50109046af0235268a261ba0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 14:53:36 +0100 Subject: sched/headers, timekeeping: Move the timer tick function prototypes to Move the update_process_times() and xtime_update() prototypes to . Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - include/linux/time.h | 2 -- include/linux/timekeeping.h | 4 ++++ 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0fb56ae0abc7..dc8d94dc140f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -180,7 +180,6 @@ extern cpumask_var_t cpu_isolated_map; extern int runqueue_is_locked(int cpu); -extern void update_process_times(int user); extern void scheduler_tick(void); #define MAX_SCHEDULE_TIMEOUT LONG_MAX diff --git a/include/linux/time.h b/include/linux/time.h index 4f4ab65b6e61..23f0f5ce3090 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -167,8 +167,6 @@ static inline bool timespec_inject_offset_valid(const struct timespec *ts) extern u32 (*arch_gettimeoffset)(void); #endif -extern void xtime_update(unsigned long ticks); - struct itimerval; extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index d2e804e15c3e..b598cbc7b576 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -8,6 +8,10 @@ void timekeeping_init(void); extern int timekeeping_suspended; +/* Architecture timer tick functions: */ +extern void update_process_times(int user); +extern void xtime_update(unsigned long ticks); + /* * Get and set timeofday */ -- cgit v1.2.3 From dcc2dc45f7cf267d37843059ff75b70260596c69 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 15:05:49 +0100 Subject: sched/headers, mm: Move 'struct tlbflush_unmap_batch' from to Unclutter some more. Also move the CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH condition inside the structure body definition, to remove a pair of #ifdefs from sched.h. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/mm_types_task.h | 22 ++++++++++++++++++++++ include/linux/sched.h | 21 --------------------- 2 files changed, 22 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index 9526d8b9fe0e..136dfdf63ba1 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -62,4 +63,25 @@ struct page_frag { #endif }; +/* Track pages that require TLB flushes */ +struct tlbflush_unmap_batch { +#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH + /* + * Each bit set is a CPU that potentially has a TLB entry for one of + * the PFNs being flushed. See set_tlb_ubc_flush_pending(). + */ + struct cpumask cpumask; + + /* True if any bit in cpumask is set */ + bool flush_required; + + /* + * If true then the PTE was dirty when unmapped. The entry must be + * flushed before IO is initiated or a stale TLB entry potentially + * allows an update without redirtying the page. + */ + bool writable; +#endif +}; + #endif /* _LINUX_MM_TYPES_TASK_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index dc8d94dc140f..8d6b7aa7f3ac 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -484,25 +484,6 @@ struct wake_q_node { struct wake_q_node *next; }; -/* Track pages that require TLB flushes */ -struct tlbflush_unmap_batch { - /* - * Each bit set is a CPU that potentially has a TLB entry for one of - * the PFNs being flushed. See set_tlb_ubc_flush_pending(). - */ - struct cpumask cpumask; - - /* True if any bit in cpumask is set */ - bool flush_required; - - /* - * If true then the PTE was dirty when unmapped. The entry must be - * flushed before IO is initiated or a stale TLB entry potentially - * allows an update without redirtying the page. - */ - bool writable; -}; - struct task_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* @@ -895,9 +876,7 @@ struct task_struct { unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH struct tlbflush_unmap_batch tlb_ubc; -#endif struct rcu_head rcu; -- cgit v1.2.3 From cda66725c1444db67127115d611c982b62b45d8c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 15:30:50 +0100 Subject: sched/headers: Move the get_task_struct()/put_task_struct() and related APIs from to These belong into the task lifetime API header. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 14 -------------- include/linux/sched/task.h | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8d6b7aa7f3ac..b68358c60560 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1135,20 +1135,6 @@ static inline int is_global_init(struct task_struct *tsk) extern struct pid *cad_pid; -extern void free_task(struct task_struct *tsk); -#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) - -extern void __put_task_struct(struct task_struct *t); - -static inline void put_task_struct(struct task_struct *t) -{ - if (atomic_dec_and_test(&t->usage)) - __put_task_struct(t); -} - -struct task_struct *task_rcu_dereference(struct task_struct **ptask); -struct task_struct *try_get_task_struct(struct task_struct **ptask); - /* * Per process flags */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index a33a8121da9a..3886ae64148f 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -75,6 +75,21 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); +extern void free_task(struct task_struct *tsk); + +#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) + +extern void __put_task_struct(struct task_struct *t); + +static inline void put_task_struct(struct task_struct *t) +{ + if (atomic_dec_and_test(&t->usage)) + __put_task_struct(t); +} + +struct task_struct *task_rcu_dereference(struct task_struct **ptask); +struct task_struct *try_get_task_struct(struct task_struct **ptask); + #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT extern int arch_task_struct_size __read_mostly; -- cgit v1.2.3 From 6f175fc9536355d8aa5c2d4854848a97c244a031 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 6 Feb 2017 11:12:45 +0100 Subject: sched/headers: Move the sched_exec() prototype to sched_exec() better fits into the task lifetime APIs than into the core scheduler APIs. This reduces the size of a bit. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 7 ------- include/linux/sched/task.h | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b68358c60560..bd89fc17767c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1258,13 +1258,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, #define cpu_relax_yield() cpu_relax() #endif -/* sched_exec is called by processes performing an exec */ -#ifdef CONFIG_SMP -extern void sched_exec(void); -#else -#define sched_exec() {} -#endif - extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 3886ae64148f..a978d7189cfd 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -77,6 +77,13 @@ extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern void free_task(struct task_struct *tsk); +/* sched_exec is called by processes performing an exec */ +#ifdef CONFIG_SMP +extern void sched_exec(void); +#else +#define sched_exec() {} +#endif + #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) extern void __put_task_struct(struct task_struct *t); -- cgit v1.2.3 From c03cb28e7c44055cd00d11b8581b9fa46a3e3764 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 16:36:40 +0100 Subject: sched/headers: Remove from The file is a self-contained header and users of it either don't need - or have already included it. This reduces the size of the header dependency graph. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/topology.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index b3550b36e65d..0d6fceff37bb 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -1,8 +1,6 @@ #ifndef _LINUX_SCHED_TOPOLOGY_H #define _LINUX_SCHED_TOPOLOGY_H -#include - #include /* -- cgit v1.2.3 From f411229e1dd623a3dee623824d094546789977e0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 18:49:05 +0100 Subject: sched/headers: Remove tsk_is_polling() It's not used by anything. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/idle.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h index 66ee32f87f0b..5ca63ebad6b4 100644 --- a/include/linux/sched/idle.h +++ b/include/linux/sched/idle.h @@ -17,10 +17,6 @@ extern void wake_up_if_idle(int cpu); * polling state. */ #ifdef TIF_POLLING_NRFLAG -static inline int tsk_is_polling(struct task_struct *p) -{ - return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); -} static inline void __current_set_polling(void) { @@ -59,7 +55,6 @@ static inline bool __must_check current_clr_polling_and_test(void) } #else -static inline int tsk_is_polling(struct task_struct *p) { return 0; } static inline void __current_set_polling(void) { } static inline void __current_clr_polling(void) { } -- cgit v1.2.3 From ea94763950e49d1aa622c59218ee2fc3b434ba6b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 16:36:40 +0100 Subject: sched/headers: Remove from The file is a largely self-contained header and users of it either don't need - or have already included it. This reduces the size of the header dependency graph. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched/clock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h index ac12f71d359c..4a68c6791207 100644 --- a/include/linux/sched/clock.h +++ b/include/linux/sched/clock.h @@ -1,7 +1,7 @@ #ifndef _LINUX_SCHED_CLOCK_H #define _LINUX_SCHED_CLOCK_H -#include +#include /* * Do not use outside of architecture code which knows its limitations. -- cgit v1.2.3 From cc689c5b352d4a7510b11c975c5c94d4785b37e7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 16:36:40 +0100 Subject: sched/headers: Remove and from The file is a self-contained header and users of it either don't need and - or have already included it. This reduces the size of the header dependency graph. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/delayacct.h | 2 -- include/linux/fault-inject.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 6b769cbd1000..4178d2493547 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -18,8 +18,6 @@ #define _LINUX_DELAYACCT_H #include -#include -#include /* * Per-task flags relevant to delay accounting diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f4956d8601c..728d4e0292aa 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -61,6 +61,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name, #endif /* CONFIG_FAULT_INJECTION */ +struct kmem_cache; + #ifdef CONFIG_FAILSLAB extern bool should_failslab(struct kmem_cache *s, gfp_t gfpflags); #else -- cgit v1.2.3 From 5a2d6880f461faa416c0d329d46a128cf342c1eb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:54 +0100 Subject: sched/headers: Remove from The file is a self-contained header and users of it either don't need - or have already included it. This reduces the size of the header dependency graph. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/loadavg.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h index c392e28ce0ac..4264bc6b2c27 100644 --- a/include/linux/sched/loadavg.h +++ b/include/linux/sched/loadavg.h @@ -1,8 +1,6 @@ #ifndef _LINUX_SCHED_LOADAVG_H #define _LINUX_SCHED_LOADAVG_H -#include - /* * These are the constant used to fake the fixed-point load-average * counting. Some notes: -- cgit v1.2.3 From 5fd73157bdfb57370b69be7c0067f08e610e7daa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:54 +0100 Subject: sched/headers: Remove from The file is a largely self-contained header and users of it either don't need - or have already included it. Add a 'task_struct' predeclaration to make it build standalone. This reduces the size of the header dependency graph. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/autogroup.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h index fd6855548d0c..55cd496df884 100644 --- a/include/linux/sched/autogroup.h +++ b/include/linux/sched/autogroup.h @@ -1,9 +1,8 @@ #ifndef _LINUX_SCHED_AUTOGROUP_H #define _LINUX_SCHED_AUTOGROUP_H -#include - struct signal_struct; +struct task_struct; struct task_group; struct seq_file; -- cgit v1.2.3 From b8d6d80b37a98e3910f1b4e799f8ebea88e94bfa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:54 +0100 Subject: sched/headers: Remove from The file is a self-contained header and users of it either don't need - or have already included it. Include kernel.h and atomic.h. This reduces the size of the header dependency graph. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 62dca4e0b4e0..830953ebb391 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -1,6 +1,8 @@ #ifndef _LINUX_SCHED_MM_H #define _LINUX_SCHED_MM_H +#include +#include #include #include #include -- cgit v1.2.3 From ae1cc8823204bb938d039afa43c28a84591ada9f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:54 +0100 Subject: sched/headers: Remove from The file is a self-contained header and users of it either don't need - or have already included it. Include . This reduces the size of the header dependency graph. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched/coredump.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index f46912aa4f4c..69eedcef8f03 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -1,7 +1,6 @@ #ifndef _LINUX_SCHED_COREDUMP_H #define _LINUX_SCHED_COREDUMP_H -#include #include #define SUID_DUMP_DISABLE 0 /* No setuid dumping */ -- cgit v1.2.3 From 556725839e543bc2b45bd73121c0b1c1d6c23714 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 10:31:24 +0100 Subject: sched/headers: Remove unused 'task_can_switch_user()' prototype The function does not exist anymore. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index bd89fc17767c..43ed34ccb53a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1509,9 +1509,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); -extern int task_can_switch_user(struct user_struct *up, - struct task_struct *tsk); - #ifndef TASK_SIZE_OF #define TASK_SIZE_OF(tsk) TASK_SIZE #endif -- cgit v1.2.3 From de8deb0ad79f8a49cea5110c006c8676a11611f7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:55 +0100 Subject: sched/headers: Remove from If we add then becomes a self-contained header and users of it either don't need or have already included it. This reduces the size of the header dependency graph. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched/user.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 40c6363da5ef..5d5415e129d4 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -1,7 +1,8 @@ #ifndef _LINUX_SCHED_USER_H #define _LINUX_SCHED_USER_H -#include +#include +#include struct key; -- cgit v1.2.3 From ac4e620967bb72f86371154935aa8b67cf54e225 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 11:44:31 +0100 Subject: sched/headers: Remove #include from The header does not actually make use of any types or APIs defined in , so remove its inclusion. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 43ed34ccb53a..54eefb2ad603 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -5,7 +5,6 @@ #include -#include #include #include #include -- cgit v1.2.3 From aa829c7679a13fca667f772db1f0ea03adc29122 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 11:54:05 +0100 Subject: sched/headers: Remove from It's not used by anything in anymore. This reduces the preprocessed size of and speeds up the build a bit. Also fix code that implicitly relied on headers included by . Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 54eefb2ad603..8991e4d7cd0f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -28,7 +28,6 @@ #include #include #include -#include #include -- cgit v1.2.3 From ed53742d793bd92ed32114081370b199dc94467d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:55 +0100 Subject: sched/headers: Remove from Make the file a self-contained header and remove the dependency: users of it either don't need - or have already included it. This reduces the size of the header dependency graph. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched/cpufreq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index 2bdcfbfc4c30..d2be2ccbb372 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h @@ -1,7 +1,7 @@ #ifndef _LINUX_SCHED_CPUFREQ_H #define _LINUX_SCHED_CPUFREQ_H -#include +#include /* * Interface between cpufreq drivers and the scheduler: -- cgit v1.2.3 From 71af2ed5eeea639339e3a1497a0196bab7de4b57 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 09:57:00 +0100 Subject: kasan, sched/headers: Remove from is a low level header that is included early in affected kernel headers. But it includes which complicates the cleanup of sched.h dependencies. Remove it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/kasan.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 7793036eac80..ceb3fe78a0d3 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -1,7 +1,6 @@ #ifndef _LINUX_KASAN_H #define _LINUX_KASAN_H -#include #include struct kmem_cache; -- cgit v1.2.3 From e26512fea5bcd6602dbf02a551ed073cd4529449 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 17:54:15 +0100 Subject: sched/headers: Remove inclusion from This reduces header dependencies and speeds up the build. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8991e4d7cd0f..1be69735cef3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From f780d89a0e820a529cf91fb78b52565e1b37b774 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 10:03:42 +0100 Subject: sched/headers: Remove from This reduces header dependencies. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 1be69735cef3..9dfa9c113570 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 9c6da18109d603a99915b257929c0370c9d3ae40 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 10:08:30 +0100 Subject: sched/headers: Remove from This reduces header dependencies. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9dfa9c113570..cc5c99a27e21 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -16,7 +16,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From f0a0eb699967f4f51567b563818bafe4017bef73 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 20:56:33 +0100 Subject: sched/headers: Remove the include from This reduces sched.h header dependencies. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index cc5c99a27e21..ab80596dddfd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -22,7 +22,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From dc1995392d79b0eeee147a792482b991f74c1494 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:57 +0100 Subject: sched/headers: Remove from The file is a largely self-contained header and users of it either don't need - or have already included it. ( Keep the dependency.) This reduces the size of the header dependency graph. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched/stat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h index d8cedf27083e..141b74c53fad 100644 --- a/include/linux/sched/stat.h +++ b/include/linux/sched/stat.h @@ -1,7 +1,7 @@ #ifndef _LINUX_SCHED_STAT_H #define _LINUX_SCHED_STAT_H -#include +#include /* * Various counters maintained by the scheduler and fork(), -- cgit v1.2.3 From b0145d9b332a5117bd0abbb980ab89586a1d5f6c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:57 +0100 Subject: sched/headers: Remove from The file is a self-contained header and users of it either don't need - or have already included it. This reduces the size of the header dependency graph. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched/nohz.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index 9471f0736a3a..4995b717500b 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -1,8 +1,6 @@ #ifndef _LINUX_SCHED_NOHZ_H #define _LINUX_SCHED_NOHZ_H -#include - /* * This is the interface between the scheduler and nohz/dyntics: */ -- cgit v1.2.3 From 4f079e98a0db5f067c0981a526ff8938e21c52e2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:57 +0100 Subject: sched/headers: Remove from The file is a self-contained header and users of it either don't need - or have already included it. This reduces the size of the header dependency graph. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched/debug.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h index 853bbef0b47b..e0eaee54c5a4 100644 --- a/include/linux/sched/debug.h +++ b/include/linux/sched/debug.h @@ -1,8 +1,6 @@ #ifndef _LINUX_SCHED_DEBUG_H #define _LINUX_SCHED_DEBUG_H -#include - /* * Various scheduler/task debugging interfaces: */ -- cgit v1.2.3 From a906086ef35129d522047a296e7c4237af75fdcb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:57 +0100 Subject: sched/headers: Remove from The file is a self-contained header and users of it either don't need - or have already included it. This reduces the size of the header dependency graph. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched/hotplug.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h index c608d3c1ddb8..752ac7e628d7 100644 --- a/include/linux/sched/hotplug.h +++ b/include/linux/sched/hotplug.h @@ -1,8 +1,6 @@ #ifndef _LINUX_SCHED_HOTPLUG_H #define _LINUX_SCHED_HOTPLUG_H -#include - /* * Scheduler interfaces for hotplug CPU support: */ -- cgit v1.2.3 From fae3c30c2d1ae3ff93877f64e5d55e2443d8f82f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 12:00:08 +0100 Subject: sched/headers: Remove the runqueue_is_locked() prototype Remove the runqueue_is_locked() prototype, the function does not exist anymore. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ab80596dddfd..ac98255d00fb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -172,8 +172,6 @@ struct uts_namespace; extern cpumask_var_t cpu_isolated_map; -extern int runqueue_is_locked(int cpu); - extern void scheduler_tick(void); #define MAX_SCHEDULE_TIMEOUT LONG_MAX -- cgit v1.2.3 From cd9c513be34ceaae8bf211474b91b6897574efdd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:58 +0100 Subject: sched/headers: Remove from This is a stray header that is not needed by anything in sched.h, so remove it. Update files that relied on the stray inclusion. This reduces the size of the header dependency graph. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- include/linux/user_namespace.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ac98255d00fb..b361f881fe44 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -226,8 +226,6 @@ struct task_cputime { #define prof_exp stime #define sched_exp sum_exec_runtime -#include - #ifdef CONFIG_SCHED_INFO struct sched_info { /* cumulative counters */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 08264641b502..faa9bfb827da 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 192c9414516e7178a44f9ed48d47501c4c19771c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 23:47:37 +0100 Subject: sched/headers: Remove from Instead of including the full , only include the types-only header in , to further decouple the scheduler header from the signal headers. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b361f881fe44..905d4f148d65 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -11,7 +11,6 @@ #include #include -#include #include #include #include -- cgit v1.2.3 From 1d1954e038435765a623884b626731784cde768f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 4 Feb 2017 01:16:15 +0100 Subject: sched/headers: Remove the 'init_pid_ns' prototype from pid.h already defines it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 905d4f148d65..057b2a85ecc7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1301,8 +1301,6 @@ static inline struct thread_info *task_thread_info(struct task_struct *task) # define task_thread_info(task) ((struct thread_info *)(task)->stack) #endif -extern struct pid_namespace init_pid_ns; - /* * find a task by one of its numerical ids * -- cgit v1.2.3 From b68070e146b9c2b4ece8d869a4fab9a4f14bbfb4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 4 Feb 2017 01:27:20 +0100 Subject: sched/headers: Remove from We don't actually need the full rculist.h header anymore, include the smaller rcupdate.h header instead. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 057b2a85ecc7..5720d11f3224 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include -- cgit v1.2.3 From 2c873d55cd838deef8218b6d5fe9bd336cb3113a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 12:11:03 +0100 Subject: sched/core: Remove unused prefetch_stack() prefetch_stack() is defined by IA64, but not actually used anywhere anymore. Remove it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5720d11f3224..bd0111a06aa2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -247,12 +247,6 @@ struct sched_info { # define SCHED_FIXEDPOINT_SHIFT 10 # define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) -#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK -extern void prefetch_stack(struct task_struct *t); -#else -static inline void prefetch_stack(struct task_struct *t) { } -#endif - struct load_weight { unsigned long weight; u32 inv_weight; -- cgit v1.2.3 From 5c0d0f36414f9f8a292b42e797f9284b127d79c2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:59 +0100 Subject: sched/headers: Remove from The file is a self-contained header and users of it either don't need - or have already included it. This reduces the size of the header dependency graph. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched/init.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/init.h b/include/linux/sched/init.h index f31d16075857..127215045285 100644 --- a/include/linux/sched/init.h +++ b/include/linux/sched/init.h @@ -1,8 +1,6 @@ #ifndef _LINUX_SCHED_INIT_H #define _LINUX_SCHED_INIT_H -#include - /* * Scheduler init related prototypes: */ -- cgit v1.2.3 From 50ff9d130014f7b19541dbf607a615a72b75b778 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 16:03:58 +0100 Subject: sched/headers: Remove from It's not used by any of the scheduler methods, but needs it to pick up STACK_END_MAGIC. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index bd0111a06aa2..559be4f1aee5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -22,7 +22,6 @@ #include #include #include -#include #include -- cgit v1.2.3 From b2d5bfea2d00a0000da18f7667c2b0e2c2f168d9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 6 Feb 2017 09:56:40 +0100 Subject: sched/headers, timers: Remove the include from So we want to simplify 's header dependencies, but one roadblock of that is 's inclusion of sysctl.h, which brings in other, problematic headers. Note that timer.h's inclusion of sysctl.h can be avoided if we pre-declare ctl_table - so do that. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/timer.h | 2 +- include/linux/user_namespace.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index c7bdf895179c..e6789b8757d5 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -212,7 +212,7 @@ struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -#include +struct ctl_table; extern unsigned int sysctl_timer_migration; int timer_migration_handler(struct ctl_table *table, int write, diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index faa9bfb827da..be765234c0a2 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 283cb90305cf1686594ed537c7a8cb188eba1a4d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 6 Feb 2017 09:59:47 +0100 Subject: sched/headers, hrtimer: Remove the include from In our quest to simplify 's header dependencies, remove the inclusion from - which does not appear to be necessary, as hrtimer.h does not use waitqueues. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index e52b427223ba..249e579ecd4c 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From ee6a3d19f15b9b10075481088b8d4537f286d7b4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 6 Feb 2017 10:01:09 +0100 Subject: sched/headers: Remove the include from It's used only by a single (rarely used) inline function (task_node(p)), which we can move to . ( Add , because we rely on that. ) Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 7 +------ include/linux/sched/topology.h | 7 +++++++ 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 559be4f1aee5..f9dc9cfaf079 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -21,7 +22,6 @@ #include #include #include -#include #include @@ -1454,11 +1454,6 @@ static inline unsigned int task_cpu(const struct task_struct *p) #endif } -static inline int task_node(const struct task_struct *p) -{ - return cpu_to_node(task_cpu(p)); -} - extern void set_task_cpu(struct task_struct *p, unsigned int cpu); #else diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 0d6fceff37bb..7d065abc7a47 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -1,6 +1,8 @@ #ifndef _LINUX_SCHED_TOPOLOGY_H #define _LINUX_SCHED_TOPOLOGY_H +#include + #include /* @@ -216,4 +218,9 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu) #endif /* !CONFIG_SMP */ +static inline int task_node(const struct task_struct *p) +{ + return cpu_to_node(task_cpu(p)); +} + #endif /* _LINUX_SCHED_TOPOLOGY_H */ -- cgit v1.2.3 From 7f5f8e8d97d77edf33f2836259d1f19c6f4d94f5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 6 Feb 2017 11:44:12 +0100 Subject: sched/headers: Remove #ifdefs from We can remove two pairs of #ifdefs by defining structures in a smarter way. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f9dc9cfaf079..341f5792fbe0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -224,8 +224,8 @@ struct task_cputime { #define prof_exp stime #define sched_exp sum_exec_runtime -#ifdef CONFIG_SCHED_INFO struct sched_info { +#ifdef CONFIG_SCHED_INFO /* cumulative counters */ unsigned long pcount; /* # of times run on this cpu */ unsigned long long run_delay; /* time spent waiting on a runqueue */ @@ -233,8 +233,8 @@ struct sched_info { /* timestamps */ unsigned long long last_arrival,/* when we last ran on a cpu */ last_queued; /* when we were last queued to run */ -}; #endif /* CONFIG_SCHED_INFO */ +}; /* * Integer metrics need fixed point arithmetic, e.g., sched/fair @@ -309,8 +309,8 @@ struct sched_avg { unsigned long load_avg, util_avg; }; -#ifdef CONFIG_SCHEDSTATS struct sched_statistics { +#ifdef CONFIG_SCHEDSTATS u64 wait_start; u64 wait_max; u64 wait_count; @@ -342,8 +342,8 @@ struct sched_statistics { u64 nr_wakeups_affine_attempts; u64 nr_wakeups_passive; u64 nr_wakeups_idle; -}; #endif +}; struct sched_entity { struct load_weight load; /* for load-balancing */ @@ -358,9 +358,7 @@ struct sched_entity { u64 nr_migrations; -#ifdef CONFIG_SCHEDSTATS struct sched_statistics statistics; -#endif #ifdef CONFIG_FAIR_GROUP_SCHED int depth; @@ -530,9 +528,7 @@ struct task_struct { int rcu_tasks_idle_cpu; #endif /* #ifdef CONFIG_TASKS_RCU */ -#ifdef CONFIG_SCHED_INFO struct sched_info sched_info; -#endif struct list_head tasks; #ifdef CONFIG_SMP -- cgit v1.2.3 From 5eca1c10cbaa9c366c18ca79f81f21c731e3dcc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 6 Feb 2017 22:06:35 +0100 Subject: sched/headers: Clean up Now that dependencies have been sorted out, do various trivial cleanups: - remove unnecessary structure predeclarations - fix various typos - update comments where necessary - remove pointless comments - use consistent types - tabulate consistently - use a consistent comment style - clean up the header section a bit - use a consistent style of a single field per line - remove line-breaks where they make the code look worse - etc ... No change in functionality. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1222 ++++++++++++++++++++++++++----------------------- 1 file changed, 651 insertions(+), 571 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 341f5792fbe0..d67eee84fd43 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1,38 +1,38 @@ #ifndef _LINUX_SCHED_H #define _LINUX_SCHED_H -#include +/* + * Define 'struct task_struct' and provide the main scheduler + * APIs (schedule(), wakeup variants, etc.) + */ -#include -#include +#include -#include -#include -#include +#include +#include #include #include -#include -#include +#include +#include +#include +#include #include +#include #include - #include -#include -#include -#include #include +#include +#include +#include +#include -#include - -/* task_struct member predeclarations: */ +/* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; -struct autogroup; struct backing_dev_info; struct bio_list; struct blk_plug; struct cfs_rq; -struct filename; struct fs_struct; struct futex_pi_state; struct io_context; @@ -52,8 +52,6 @@ struct sighand_struct; struct signal_struct; struct task_delay_info; struct task_group; -struct task_struct; -struct uts_namespace; /* * Task state bitmask. NOTE! These bits are also @@ -65,50 +63,53 @@ struct uts_namespace; * modifying one set can't modify the other one by * mistake. */ -#define TASK_RUNNING 0 -#define TASK_INTERRUPTIBLE 1 -#define TASK_UNINTERRUPTIBLE 2 -#define __TASK_STOPPED 4 -#define __TASK_TRACED 8 -/* in tsk->exit_state */ -#define EXIT_DEAD 16 -#define EXIT_ZOMBIE 32 -#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) -/* in tsk->state again */ -#define TASK_DEAD 64 -#define TASK_WAKEKILL 128 -#define TASK_WAKING 256 -#define TASK_PARKED 512 -#define TASK_NOLOAD 1024 -#define TASK_NEW 2048 -#define TASK_STATE_MAX 4096 - -#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" - -/* Convenience macros for the sake of set_current_state */ -#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) -#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) -#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) - -#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) - -/* Convenience macros for the sake of wake_up */ -#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) -#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) - -/* get_task_state() */ -#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ - TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ - __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) - -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) -#define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) -#define task_is_stopped_or_traced(task) \ - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) -#define task_contributes_to_load(task) \ - ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ - (task->flags & PF_FROZEN) == 0 && \ - (task->state & TASK_NOLOAD) == 0) + +/* Used in tsk->state: */ +#define TASK_RUNNING 0 +#define TASK_INTERRUPTIBLE 1 +#define TASK_UNINTERRUPTIBLE 2 +#define __TASK_STOPPED 4 +#define __TASK_TRACED 8 +/* Used in tsk->exit_state: */ +#define EXIT_DEAD 16 +#define EXIT_ZOMBIE 32 +#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) +/* Used in tsk->state again: */ +#define TASK_DEAD 64 +#define TASK_WAKEKILL 128 +#define TASK_WAKING 256 +#define TASK_PARKED 512 +#define TASK_NOLOAD 1024 +#define TASK_NEW 2048 +#define TASK_STATE_MAX 4096 + +#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" + +/* Convenience macros for the sake of set_current_state: */ +#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) +#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) +#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) + +#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) + +/* Convenience macros for the sake of wake_up(): */ +#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) +#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) + +/* get_task_state(): */ +#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ + TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ + __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) + +#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) + +#define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) + +#define task_is_stopped_or_traced(task) ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) + +#define task_contributes_to_load(task) ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ + (task->flags & PF_FROZEN) == 0 && \ + (task->state & TASK_NOLOAD) == 0) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP @@ -158,26 +159,24 @@ struct uts_namespace; * * Also see the comments of try_to_wake_up(). */ -#define __set_current_state(state_value) \ - do { current->state = (state_value); } while (0) -#define set_current_state(state_value) \ - smp_store_mb(current->state, (state_value)) - +#define __set_current_state(state_value) do { current->state = (state_value); } while (0) +#define set_current_state(state_value) smp_store_mb(current->state, (state_value)) #endif -/* Task command name length */ -#define TASK_COMM_LEN 16 +/* Task command name length: */ +#define TASK_COMM_LEN 16 -extern cpumask_var_t cpu_isolated_map; +extern cpumask_var_t cpu_isolated_map; extern void scheduler_tick(void); -#define MAX_SCHEDULE_TIMEOUT LONG_MAX -extern signed long schedule_timeout(signed long timeout); -extern signed long schedule_timeout_interruptible(signed long timeout); -extern signed long schedule_timeout_killable(signed long timeout); -extern signed long schedule_timeout_uninterruptible(signed long timeout); -extern signed long schedule_timeout_idle(signed long timeout); +#define MAX_SCHEDULE_TIMEOUT LONG_MAX + +extern long schedule_timeout(long timeout); +extern long schedule_timeout_interruptible(long timeout); +extern long schedule_timeout_killable(long timeout); +extern long schedule_timeout_uninterruptible(long timeout); +extern long schedule_timeout_idle(long timeout); asmlinkage void schedule(void); extern void schedule_preempt_disabled(void); @@ -197,9 +196,9 @@ extern void io_schedule(void); */ struct prev_cputime { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - u64 utime; - u64 stime; - raw_spinlock_t lock; + u64 utime; + u64 stime; + raw_spinlock_t lock; #endif }; @@ -214,25 +213,34 @@ struct prev_cputime { * these counts together and treat all three of them in parallel. */ struct task_cputime { - u64 utime; - u64 stime; - unsigned long long sum_exec_runtime; + u64 utime; + u64 stime; + unsigned long long sum_exec_runtime; }; -/* Alternate field names when used to cache expirations. */ -#define virt_exp utime -#define prof_exp stime -#define sched_exp sum_exec_runtime +/* Alternate field names when used on cache expirations: */ +#define virt_exp utime +#define prof_exp stime +#define sched_exp sum_exec_runtime struct sched_info { #ifdef CONFIG_SCHED_INFO - /* cumulative counters */ - unsigned long pcount; /* # of times run on this cpu */ - unsigned long long run_delay; /* time spent waiting on a runqueue */ + /* Cumulative counters: */ + + /* # of times we have run on this CPU: */ + unsigned long pcount; + + /* Time spent waiting on a runqueue: */ + unsigned long long run_delay; + + /* Timestamps: */ + + /* When did we last run on a CPU? */ + unsigned long long last_arrival; + + /* When were we last queued to run? */ + unsigned long long last_queued; - /* timestamps */ - unsigned long long last_arrival,/* when we last ran on a cpu */ - last_queued; /* when we were last queued to run */ #endif /* CONFIG_SCHED_INFO */ }; @@ -243,12 +251,12 @@ struct sched_info { * We define a basic fixed point arithmetic range, and then formalize * all these metrics based on that basic range. */ -# define SCHED_FIXEDPOINT_SHIFT 10 -# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) +# define SCHED_FIXEDPOINT_SHIFT 10 +# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) struct load_weight { - unsigned long weight; - u32 inv_weight; + unsigned long weight; + u32 inv_weight; }; /* @@ -304,69 +312,73 @@ struct load_weight { * issues. */ struct sched_avg { - u64 last_update_time, load_sum; - u32 util_sum, period_contrib; - unsigned long load_avg, util_avg; + u64 last_update_time; + u64 load_sum; + u32 util_sum; + u32 period_contrib; + unsigned long load_avg; + unsigned long util_avg; }; struct sched_statistics { #ifdef CONFIG_SCHEDSTATS - u64 wait_start; - u64 wait_max; - u64 wait_count; - u64 wait_sum; - u64 iowait_count; - u64 iowait_sum; - - u64 sleep_start; - u64 sleep_max; - s64 sum_sleep_runtime; - - u64 block_start; - u64 block_max; - u64 exec_max; - u64 slice_max; - - u64 nr_migrations_cold; - u64 nr_failed_migrations_affine; - u64 nr_failed_migrations_running; - u64 nr_failed_migrations_hot; - u64 nr_forced_migrations; - - u64 nr_wakeups; - u64 nr_wakeups_sync; - u64 nr_wakeups_migrate; - u64 nr_wakeups_local; - u64 nr_wakeups_remote; - u64 nr_wakeups_affine; - u64 nr_wakeups_affine_attempts; - u64 nr_wakeups_passive; - u64 nr_wakeups_idle; + u64 wait_start; + u64 wait_max; + u64 wait_count; + u64 wait_sum; + u64 iowait_count; + u64 iowait_sum; + + u64 sleep_start; + u64 sleep_max; + s64 sum_sleep_runtime; + + u64 block_start; + u64 block_max; + u64 exec_max; + u64 slice_max; + + u64 nr_migrations_cold; + u64 nr_failed_migrations_affine; + u64 nr_failed_migrations_running; + u64 nr_failed_migrations_hot; + u64 nr_forced_migrations; + + u64 nr_wakeups; + u64 nr_wakeups_sync; + u64 nr_wakeups_migrate; + u64 nr_wakeups_local; + u64 nr_wakeups_remote; + u64 nr_wakeups_affine; + u64 nr_wakeups_affine_attempts; + u64 nr_wakeups_passive; + u64 nr_wakeups_idle; #endif }; struct sched_entity { - struct load_weight load; /* for load-balancing */ - struct rb_node run_node; - struct list_head group_node; - unsigned int on_rq; + /* For load-balancing: */ + struct load_weight load; + struct rb_node run_node; + struct list_head group_node; + unsigned int on_rq; - u64 exec_start; - u64 sum_exec_runtime; - u64 vruntime; - u64 prev_sum_exec_runtime; + u64 exec_start; + u64 sum_exec_runtime; + u64 vruntime; + u64 prev_sum_exec_runtime; - u64 nr_migrations; + u64 nr_migrations; - struct sched_statistics statistics; + struct sched_statistics statistics; #ifdef CONFIG_FAIR_GROUP_SCHED - int depth; - struct sched_entity *parent; + int depth; + struct sched_entity *parent; /* rq on which this entity is (to be) queued: */ - struct cfs_rq *cfs_rq; + struct cfs_rq *cfs_rq; /* rq "owned" by this entity/group: */ - struct cfs_rq *my_q; + struct cfs_rq *my_q; #endif #ifdef CONFIG_SMP @@ -376,49 +388,49 @@ struct sched_entity { * Put into separate cache line so it does not * collide with read-mostly values above. */ - struct sched_avg avg ____cacheline_aligned_in_smp; + struct sched_avg avg ____cacheline_aligned_in_smp; #endif }; struct sched_rt_entity { - struct list_head run_list; - unsigned long timeout; - unsigned long watchdog_stamp; - unsigned int time_slice; - unsigned short on_rq; - unsigned short on_list; - - struct sched_rt_entity *back; + struct list_head run_list; + unsigned long timeout; + unsigned long watchdog_stamp; + unsigned int time_slice; + unsigned short on_rq; + unsigned short on_list; + + struct sched_rt_entity *back; #ifdef CONFIG_RT_GROUP_SCHED - struct sched_rt_entity *parent; + struct sched_rt_entity *parent; /* rq on which this entity is (to be) queued: */ - struct rt_rq *rt_rq; + struct rt_rq *rt_rq; /* rq "owned" by this entity/group: */ - struct rt_rq *my_q; + struct rt_rq *my_q; #endif }; struct sched_dl_entity { - struct rb_node rb_node; + struct rb_node rb_node; /* * Original scheduling parameters. Copied here from sched_attr * during sched_setattr(), they will remain the same until * the next sched_setattr(). */ - u64 dl_runtime; /* maximum runtime for each instance */ - u64 dl_deadline; /* relative deadline of each instance */ - u64 dl_period; /* separation of two instances (period) */ - u64 dl_bw; /* dl_runtime / dl_deadline */ + u64 dl_runtime; /* Maximum runtime for each instance */ + u64 dl_deadline; /* Relative deadline of each instance */ + u64 dl_period; /* Separation of two instances (period) */ + u64 dl_bw; /* dl_runtime / dl_deadline */ /* * Actual scheduling parameters. Initialized with the values above, * they are continously updated during task execution. Note that * the remaining runtime could be < 0 in case we are in overrun. */ - s64 runtime; /* remaining runtime for this instance */ - u64 deadline; /* absolute deadline for this instance */ - unsigned int flags; /* specifying the scheduler behaviour */ + s64 runtime; /* Remaining runtime for this instance */ + u64 deadline; /* Absolute deadline for this instance */ + unsigned int flags; /* Specifying the scheduler behaviour */ /* * Some bool flags: @@ -431,24 +443,28 @@ struct sched_dl_entity { * outside bandwidth enforcement mechanism (but only until we * exit the critical section); * - * @dl_yielded tells if task gave up the cpu before consuming + * @dl_yielded tells if task gave up the CPU before consuming * all its available runtime during the last job. */ - int dl_throttled, dl_boosted, dl_yielded; + int dl_throttled; + int dl_boosted; + int dl_yielded; /* * Bandwidth enforcement timer. Each -deadline task has its * own bandwidth to be enforced, thus we need one timer per task. */ - struct hrtimer dl_timer; + struct hrtimer dl_timer; }; union rcu_special { struct { - u8 blocked; - u8 need_qs; - u8 exp_need_qs; - u8 pad; /* Otherwise the compiler can store garbage here. */ + u8 blocked; + u8 need_qs; + u8 exp_need_qs; + + /* Otherwise the compiler can store garbage here: */ + u8 pad; } b; /* Bits. */ u32 s; /* Set of bits. */ }; @@ -470,361 +486,417 @@ struct task_struct { * For reasons of header soup (see current_thread_info()), this * must be the first element of task_struct. */ - struct thread_info thread_info; + struct thread_info thread_info; #endif - volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ - void *stack; - atomic_t usage; - unsigned int flags; /* per process flags, defined below */ - unsigned int ptrace; + /* -1 unrunnable, 0 runnable, >0 stopped: */ + volatile long state; + void *stack; + atomic_t usage; + /* Per task flags (PF_*), defined further below: */ + unsigned int flags; + unsigned int ptrace; #ifdef CONFIG_SMP - struct llist_node wake_entry; - int on_cpu; + struct llist_node wake_entry; + int on_cpu; #ifdef CONFIG_THREAD_INFO_IN_TASK - unsigned int cpu; /* current CPU */ + /* Current CPU: */ + unsigned int cpu; #endif - unsigned int wakee_flips; - unsigned long wakee_flip_decay_ts; - struct task_struct *last_wakee; + unsigned int wakee_flips; + unsigned long wakee_flip_decay_ts; + struct task_struct *last_wakee; - int wake_cpu; + int wake_cpu; #endif - int on_rq; + int on_rq; + + int prio; + int static_prio; + int normal_prio; + unsigned int rt_priority; - int prio, static_prio, normal_prio; - unsigned int rt_priority; - const struct sched_class *sched_class; - struct sched_entity se; - struct sched_rt_entity rt; + const struct sched_class *sched_class; + struct sched_entity se; + struct sched_rt_entity rt; #ifdef CONFIG_CGROUP_SCHED - struct task_group *sched_task_group; + struct task_group *sched_task_group; #endif - struct sched_dl_entity dl; + struct sched_dl_entity dl; #ifdef CONFIG_PREEMPT_NOTIFIERS - /* list of struct preempt_notifier: */ - struct hlist_head preempt_notifiers; + /* List of struct preempt_notifier: */ + struct hlist_head preempt_notifiers; #endif #ifdef CONFIG_BLK_DEV_IO_TRACE - unsigned int btrace_seq; + unsigned int btrace_seq; #endif - unsigned int policy; - int nr_cpus_allowed; - cpumask_t cpus_allowed; + unsigned int policy; + int nr_cpus_allowed; + cpumask_t cpus_allowed; #ifdef CONFIG_PREEMPT_RCU - int rcu_read_lock_nesting; - union rcu_special rcu_read_unlock_special; - struct list_head rcu_node_entry; - struct rcu_node *rcu_blocked_node; + int rcu_read_lock_nesting; + union rcu_special rcu_read_unlock_special; + struct list_head rcu_node_entry; + struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_PREEMPT_RCU */ + #ifdef CONFIG_TASKS_RCU - unsigned long rcu_tasks_nvcsw; - bool rcu_tasks_holdout; - struct list_head rcu_tasks_holdout_list; - int rcu_tasks_idle_cpu; + unsigned long rcu_tasks_nvcsw; + bool rcu_tasks_holdout; + struct list_head rcu_tasks_holdout_list; + int rcu_tasks_idle_cpu; #endif /* #ifdef CONFIG_TASKS_RCU */ - struct sched_info sched_info; + struct sched_info sched_info; - struct list_head tasks; + struct list_head tasks; #ifdef CONFIG_SMP - struct plist_node pushable_tasks; - struct rb_node pushable_dl_tasks; + struct plist_node pushable_tasks; + struct rb_node pushable_dl_tasks; #endif - struct mm_struct *mm, *active_mm; + struct mm_struct *mm; + struct mm_struct *active_mm; /* Per-thread vma caching: */ - struct vmacache vmacache; - -#if defined(SPLIT_RSS_COUNTING) - struct task_rss_stat rss_stat; -#endif -/* task state */ - int exit_state; - int exit_code, exit_signal; - int pdeath_signal; /* The signal sent when the parent dies */ - unsigned long jobctl; /* JOBCTL_*, siglock protected */ - - /* Used for emulating ABI behavior of previous Linux versions */ - unsigned int personality; - - /* scheduler bits, serialized by scheduler locks */ - unsigned sched_reset_on_fork:1; - unsigned sched_contributes_to_load:1; - unsigned sched_migrated:1; - unsigned sched_remote_wakeup:1; - unsigned :0; /* force alignment to the next boundary */ - - /* unserialized, strictly 'current' */ - unsigned in_execve:1; /* bit to tell LSMs we're in execve */ - unsigned in_iowait:1; -#if !defined(TIF_RESTORE_SIGMASK) - unsigned restore_sigmask:1; + struct vmacache vmacache; + +#ifdef SPLIT_RSS_COUNTING + struct task_rss_stat rss_stat; +#endif + int exit_state; + int exit_code; + int exit_signal; + /* The signal sent when the parent dies: */ + int pdeath_signal; + /* JOBCTL_*, siglock protected: */ + unsigned long jobctl; + + /* Used for emulating ABI behavior of previous Linux versions: */ + unsigned int personality; + + /* Scheduler bits, serialized by scheduler locks: */ + unsigned sched_reset_on_fork:1; + unsigned sched_contributes_to_load:1; + unsigned sched_migrated:1; + unsigned sched_remote_wakeup:1; + /* Force alignment to the next boundary: */ + unsigned :0; + + /* Unserialized, strictly 'current' */ + + /* Bit to tell LSMs we're in execve(): */ + unsigned in_execve:1; + unsigned in_iowait:1; +#ifndef TIF_RESTORE_SIGMASK + unsigned restore_sigmask:1; #endif #ifdef CONFIG_MEMCG - unsigned memcg_may_oom:1; + unsigned memcg_may_oom:1; #ifndef CONFIG_SLOB - unsigned memcg_kmem_skip_account:1; + unsigned memcg_kmem_skip_account:1; #endif #endif #ifdef CONFIG_COMPAT_BRK - unsigned brk_randomized:1; + unsigned brk_randomized:1; #endif - unsigned long atomic_flags; /* Flags needing atomic access. */ + unsigned long atomic_flags; /* Flags requiring atomic access. */ - struct restart_block restart_block; + struct restart_block restart_block; - pid_t pid; - pid_t tgid; + pid_t pid; + pid_t tgid; #ifdef CONFIG_CC_STACKPROTECTOR - /* Canary value for the -fstack-protector gcc feature */ - unsigned long stack_canary; + /* Canary value for the -fstack-protector GCC feature: */ + unsigned long stack_canary; #endif /* - * pointers to (original) parent process, youngest child, younger sibling, + * Pointers to the (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with * p->real_parent->pid) */ - struct task_struct __rcu *real_parent; /* real parent process */ - struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */ + + /* Real parent process: */ + struct task_struct __rcu *real_parent; + + /* Recipient of SIGCHLD, wait4() reports: */ + struct task_struct __rcu *parent; + /* - * children/sibling forms the list of my natural children + * Children/sibling form the list of natural children: */ - struct list_head children; /* list of my children */ - struct list_head sibling; /* linkage in my parent's children list */ - struct task_struct *group_leader; /* threadgroup leader */ + struct list_head children; + struct list_head sibling; + struct task_struct *group_leader; /* - * ptraced is the list of tasks this task is using ptrace on. + * 'ptraced' is the list of tasks this task is using ptrace() on. + * * This includes both natural children and PTRACE_ATTACH targets. - * p->ptrace_entry is p's link on the p->parent->ptraced list. + * 'ptrace_entry' is this task's link on the p->parent->ptraced list. */ - struct list_head ptraced; - struct list_head ptrace_entry; + struct list_head ptraced; + struct list_head ptrace_entry; /* PID/PID hash table linkage. */ - struct pid_link pids[PIDTYPE_MAX]; - struct list_head thread_group; - struct list_head thread_node; + struct pid_link pids[PIDTYPE_MAX]; + struct list_head thread_group; + struct list_head thread_node; + + struct completion *vfork_done; - struct completion *vfork_done; /* for vfork() */ - int __user *set_child_tid; /* CLONE_CHILD_SETTID */ - int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ + /* CLONE_CHILD_SETTID: */ + int __user *set_child_tid; - u64 utime, stime; + /* CLONE_CHILD_CLEARTID: */ + int __user *clear_child_tid; + + u64 utime; + u64 stime; #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME - u64 utimescaled, stimescaled; + u64 utimescaled; + u64 stimescaled; #endif - u64 gtime; - struct prev_cputime prev_cputime; + u64 gtime; + struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN - seqcount_t vtime_seqcount; - unsigned long long vtime_snap; + seqcount_t vtime_seqcount; + unsigned long long vtime_snap; enum { - /* Task is sleeping or running in a CPU with VTIME inactive */ + /* Task is sleeping or running in a CPU with VTIME inactive: */ VTIME_INACTIVE = 0, - /* Task runs in userspace in a CPU with VTIME active */ + /* Task runs in userspace in a CPU with VTIME active: */ VTIME_USER, - /* Task runs in kernelspace in a CPU with VTIME active */ + /* Task runs in kernelspace in a CPU with VTIME active: */ VTIME_SYS, } vtime_snap_whence; #endif #ifdef CONFIG_NO_HZ_FULL - atomic_t tick_dep_mask; + atomic_t tick_dep_mask; #endif - unsigned long nvcsw, nivcsw; /* context switch counts */ - u64 start_time; /* monotonic time in nsec */ - u64 real_start_time; /* boot based time in nsec */ -/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ - unsigned long min_flt, maj_flt; + /* Context switch counts: */ + unsigned long nvcsw; + unsigned long nivcsw; + + /* Monotonic time in nsecs: */ + u64 start_time; + + /* Boot based time in nsecs: */ + u64 real_start_time; + + /* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */ + unsigned long min_flt; + unsigned long maj_flt; #ifdef CONFIG_POSIX_TIMERS - struct task_cputime cputime_expires; - struct list_head cpu_timers[3]; -#endif - -/* process credentials */ - const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ - const struct cred __rcu *real_cred; /* objective and real subjective task - * credentials (COW) */ - const struct cred __rcu *cred; /* effective (overridable) subjective task - * credentials (COW) */ - char comm[TASK_COMM_LEN]; /* executable name excluding path - - access with [gs]et_task_comm (which lock - it with task_lock()) - - initialized normally by setup_new_exec */ -/* file system info */ - struct nameidata *nameidata; + struct task_cputime cputime_expires; + struct list_head cpu_timers[3]; +#endif + + /* Process credentials: */ + + /* Tracer's credentials at attach: */ + const struct cred __rcu *ptracer_cred; + + /* Objective and real subjective task credentials (COW): */ + const struct cred __rcu *real_cred; + + /* Effective (overridable) subjective task credentials (COW): */ + const struct cred __rcu *cred; + + /* + * executable name, excluding path. + * + * - normally initialized setup_new_exec() + * - access it with [gs]et_task_comm() + * - lock it with task_lock() + */ + char comm[TASK_COMM_LEN]; + + struct nameidata *nameidata; + #ifdef CONFIG_SYSVIPC -/* ipc stuff */ - struct sysv_sem sysvsem; - struct sysv_shm sysvshm; + struct sysv_sem sysvsem; + struct sysv_shm sysvshm; #endif #ifdef CONFIG_DETECT_HUNG_TASK -/* hung task detection */ - unsigned long last_switch_count; -#endif -/* filesystem information */ - struct fs_struct *fs; -/* open file information */ - struct files_struct *files; -/* namespaces */ - struct nsproxy *nsproxy; -/* signal handlers */ - struct signal_struct *signal; - struct sighand_struct *sighand; - - sigset_t blocked, real_blocked; - sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ - struct sigpending pending; - - unsigned long sas_ss_sp; - size_t sas_ss_size; - unsigned sas_ss_flags; - - struct callback_head *task_works; - - struct audit_context *audit_context; + unsigned long last_switch_count; +#endif + /* Filesystem information: */ + struct fs_struct *fs; + + /* Open file information: */ + struct files_struct *files; + + /* Namespaces: */ + struct nsproxy *nsproxy; + + /* Signal handlers: */ + struct signal_struct *signal; + struct sighand_struct *sighand; + sigset_t blocked; + sigset_t real_blocked; + /* Restored if set_restore_sigmask() was used: */ + sigset_t saved_sigmask; + struct sigpending pending; + unsigned long sas_ss_sp; + size_t sas_ss_size; + unsigned int sas_ss_flags; + + struct callback_head *task_works; + + struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL - kuid_t loginuid; - unsigned int sessionid; + kuid_t loginuid; + unsigned int sessionid; #endif - struct seccomp seccomp; + struct seccomp seccomp; -/* Thread group tracking */ - u32 parent_exec_id; - u32 self_exec_id; -/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, - * mempolicy */ - spinlock_t alloc_lock; + /* Thread group tracking: */ + u32 parent_exec_id; + u32 self_exec_id; + + /* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */ + spinlock_t alloc_lock; /* Protection of the PI data structures: */ - raw_spinlock_t pi_lock; + raw_spinlock_t pi_lock; - struct wake_q_node wake_q; + struct wake_q_node wake_q; #ifdef CONFIG_RT_MUTEXES - /* PI waiters blocked on a rt_mutex held by this task */ - struct rb_root pi_waiters; - struct rb_node *pi_waiters_leftmost; - /* Deadlock detection and priority inheritance handling */ - struct rt_mutex_waiter *pi_blocked_on; + /* PI waiters blocked on a rt_mutex held by this task: */ + struct rb_root pi_waiters; + struct rb_node *pi_waiters_leftmost; + /* Deadlock detection and priority inheritance handling: */ + struct rt_mutex_waiter *pi_blocked_on; #endif #ifdef CONFIG_DEBUG_MUTEXES - /* mutex deadlock detection */ - struct mutex_waiter *blocked_on; + /* Mutex deadlock detection: */ + struct mutex_waiter *blocked_on; #endif + #ifdef CONFIG_TRACE_IRQFLAGS - unsigned int irq_events; - unsigned long hardirq_enable_ip; - unsigned long hardirq_disable_ip; - unsigned int hardirq_enable_event; - unsigned int hardirq_disable_event; - int hardirqs_enabled; - int hardirq_context; - unsigned long softirq_disable_ip; - unsigned long softirq_enable_ip; - unsigned int softirq_disable_event; - unsigned int softirq_enable_event; - int softirqs_enabled; - int softirq_context; + unsigned int irq_events; + unsigned long hardirq_enable_ip; + unsigned long hardirq_disable_ip; + unsigned int hardirq_enable_event; + unsigned int hardirq_disable_event; + int hardirqs_enabled; + int hardirq_context; + unsigned long softirq_disable_ip; + unsigned long softirq_enable_ip; + unsigned int softirq_disable_event; + unsigned int softirq_enable_event; + int softirqs_enabled; + int softirq_context; #endif + #ifdef CONFIG_LOCKDEP -# define MAX_LOCK_DEPTH 48UL - u64 curr_chain_key; - int lockdep_depth; - unsigned int lockdep_recursion; - struct held_lock held_locks[MAX_LOCK_DEPTH]; - gfp_t lockdep_reclaim_gfp; +# define MAX_LOCK_DEPTH 48UL + u64 curr_chain_key; + int lockdep_depth; + unsigned int lockdep_recursion; + struct held_lock held_locks[MAX_LOCK_DEPTH]; + gfp_t lockdep_reclaim_gfp; #endif + #ifdef CONFIG_UBSAN - unsigned int in_ubsan; + unsigned int in_ubsan; #endif -/* journalling filesystem info */ - void *journal_info; + /* Journalling filesystem info: */ + void *journal_info; -/* stacked block device info */ - struct bio_list *bio_list; + /* Stacked block device info: */ + struct bio_list *bio_list; #ifdef CONFIG_BLOCK -/* stack plugging */ - struct blk_plug *plug; + /* Stack plugging: */ + struct blk_plug *plug; #endif -/* VM state */ - struct reclaim_state *reclaim_state; + /* VM state: */ + struct reclaim_state *reclaim_state; + + struct backing_dev_info *backing_dev_info; - struct backing_dev_info *backing_dev_info; + struct io_context *io_context; - struct io_context *io_context; + /* Ptrace state: */ + unsigned long ptrace_message; + siginfo_t *last_siginfo; - unsigned long ptrace_message; - siginfo_t *last_siginfo; /* For ptrace use. */ - struct task_io_accounting ioac; -#if defined(CONFIG_TASK_XACCT) - u64 acct_rss_mem1; /* accumulated rss usage */ - u64 acct_vm_mem1; /* accumulated virtual memory usage */ - u64 acct_timexpd; /* stime + utime since last update */ + struct task_io_accounting ioac; +#ifdef CONFIG_TASK_XACCT + /* Accumulated RSS usage: */ + u64 acct_rss_mem1; + /* Accumulated virtual memory usage: */ + u64 acct_vm_mem1; + /* stime + utime since last update: */ + u64 acct_timexpd; #endif #ifdef CONFIG_CPUSETS - nodemask_t mems_allowed; /* Protected by alloc_lock */ - seqcount_t mems_allowed_seq; /* Seqence no to catch updates */ - int cpuset_mem_spread_rotor; - int cpuset_slab_spread_rotor; + /* Protected by ->alloc_lock: */ + nodemask_t mems_allowed; + /* Seqence number to catch updates: */ + seqcount_t mems_allowed_seq; + int cpuset_mem_spread_rotor; + int cpuset_slab_spread_rotor; #endif #ifdef CONFIG_CGROUPS - /* Control Group info protected by css_set_lock */ - struct css_set __rcu *cgroups; - /* cg_list protected by css_set_lock and tsk->alloc_lock */ - struct list_head cg_list; + /* Control Group info protected by css_set_lock: */ + struct css_set __rcu *cgroups; + /* cg_list protected by css_set_lock and tsk->alloc_lock: */ + struct list_head cg_list; #endif #ifdef CONFIG_INTEL_RDT_A - int closid; + int closid; #endif #ifdef CONFIG_FUTEX - struct robust_list_head __user *robust_list; + struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT struct compat_robust_list_head __user *compat_robust_list; #endif - struct list_head pi_state_list; - struct futex_pi_state *pi_state_cache; + struct list_head pi_state_list; + struct futex_pi_state *pi_state_cache; #endif #ifdef CONFIG_PERF_EVENTS - struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; - struct mutex perf_event_mutex; - struct list_head perf_event_list; + struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; + struct mutex perf_event_mutex; + struct list_head perf_event_list; #endif #ifdef CONFIG_DEBUG_PREEMPT - unsigned long preempt_disable_ip; + unsigned long preempt_disable_ip; #endif #ifdef CONFIG_NUMA - struct mempolicy *mempolicy; /* Protected by alloc_lock */ - short il_next; - short pref_node_fork; + /* Protected by alloc_lock: */ + struct mempolicy *mempolicy; + short il_next; + short pref_node_fork; #endif #ifdef CONFIG_NUMA_BALANCING - int numa_scan_seq; - unsigned int numa_scan_period; - unsigned int numa_scan_period_max; - int numa_preferred_nid; - unsigned long numa_migrate_retry; - u64 node_stamp; /* migration stamp */ - u64 last_task_numa_placement; - u64 last_sum_exec_runtime; - struct callback_head numa_work; - - struct list_head numa_entry; - struct numa_group *numa_group; + int numa_scan_seq; + unsigned int numa_scan_period; + unsigned int numa_scan_period_max; + int numa_preferred_nid; + unsigned long numa_migrate_retry; + /* Migration stamp: */ + u64 node_stamp; + u64 last_task_numa_placement; + u64 last_sum_exec_runtime; + struct callback_head numa_work; + + struct list_head numa_entry; + struct numa_group *numa_group; /* * numa_faults is an array split into four regions: @@ -840,8 +912,8 @@ struct task_struct { * during the current scan window. When the scan completes, the counts * in faults_memory and faults_cpu decay and these values are copied. */ - unsigned long *numa_faults; - unsigned long total_numa_faults; + unsigned long *numa_faults; + unsigned long total_numa_faults; /* * numa_faults_locality tracks if faults recorded during the last @@ -849,119 +921,132 @@ struct task_struct { * period is adapted based on the locality of the faults with different * weights depending on whether they were shared or private faults */ - unsigned long numa_faults_locality[3]; + unsigned long numa_faults_locality[3]; - unsigned long numa_pages_migrated; + unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ - struct tlbflush_unmap_batch tlb_ubc; + struct tlbflush_unmap_batch tlb_ubc; - struct rcu_head rcu; + struct rcu_head rcu; - /* - * cache last used pipe for splice - */ - struct pipe_inode_info *splice_pipe; + /* Cache last used pipe for splice(): */ + struct pipe_inode_info *splice_pipe; - struct page_frag task_frag; + struct page_frag task_frag; #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info *delays; #endif #ifdef CONFIG_FAULT_INJECTION - int make_it_fail; + int make_it_fail; #endif /* - * when (nr_dirtied >= nr_dirtied_pause), it's time to call - * balance_dirty_pages() for some dirty throttling pause + * When (nr_dirtied >= nr_dirtied_pause), it's time to call + * balance_dirty_pages() for a dirty throttling pause: */ - int nr_dirtied; - int nr_dirtied_pause; - unsigned long dirty_paused_when; /* start of a write-and-pause period */ + int nr_dirtied; + int nr_dirtied_pause; + /* Start of a write-and-pause period: */ + unsigned long dirty_paused_when; #ifdef CONFIG_LATENCYTOP - int latency_record_count; - struct latency_record latency_record[LT_SAVECOUNT]; + int latency_record_count; + struct latency_record latency_record[LT_SAVECOUNT]; #endif /* - * time slack values; these are used to round up poll() and + * Time slack values; these are used to round up poll() and * select() etc timeout values. These are in nanoseconds. */ - u64 timer_slack_ns; - u64 default_timer_slack_ns; + u64 timer_slack_ns; + u64 default_timer_slack_ns; #ifdef CONFIG_KASAN - unsigned int kasan_depth; + unsigned int kasan_depth; #endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER - /* Index of current stored address in ret_stack */ - int curr_ret_stack; - /* Stack of return addresses for return function tracing */ - struct ftrace_ret_stack *ret_stack; - /* time stamp for last schedule */ - unsigned long long ftrace_timestamp; + /* Index of current stored address in ret_stack: */ + int curr_ret_stack; + + /* Stack of return addresses for return function tracing: */ + struct ftrace_ret_stack *ret_stack; + + /* Timestamp for last schedule: */ + unsigned long long ftrace_timestamp; + /* * Number of functions that haven't been traced - * because of depth overrun. + * because of depth overrun: */ - atomic_t trace_overrun; - /* Pause for the tracing */ - atomic_t tracing_graph_pause; + atomic_t trace_overrun; + + /* Pause tracing: */ + atomic_t tracing_graph_pause; #endif + #ifdef CONFIG_TRACING - /* state flags for use by tracers */ - unsigned long trace; - /* bitmask and counter of trace recursion */ - unsigned long trace_recursion; + /* State flags for use by tracers: */ + unsigned long trace; + + /* Bitmask and counter of trace recursion: */ + unsigned long trace_recursion; #endif /* CONFIG_TRACING */ + #ifdef CONFIG_KCOV - /* Coverage collection mode enabled for this task (0 if disabled). */ - enum kcov_mode kcov_mode; - /* Size of the kcov_area. */ - unsigned kcov_size; - /* Buffer for coverage collection. */ - void *kcov_area; - /* kcov desciptor wired with this task or NULL. */ - struct kcov *kcov; + /* Coverage collection mode enabled for this task (0 if disabled): */ + enum kcov_mode kcov_mode; + + /* Size of the kcov_area: */ + unsigned int kcov_size; + + /* Buffer for coverage collection: */ + void *kcov_area; + + /* KCOV descriptor wired with this task or NULL: */ + struct kcov *kcov; #endif + #ifdef CONFIG_MEMCG - struct mem_cgroup *memcg_in_oom; - gfp_t memcg_oom_gfp_mask; - int memcg_oom_order; + struct mem_cgroup *memcg_in_oom; + gfp_t memcg_oom_gfp_mask; + int memcg_oom_order; - /* number of pages to reclaim on returning to userland */ - unsigned int memcg_nr_pages_over_high; + /* Number of pages to reclaim on returning to userland: */ + unsigned int memcg_nr_pages_over_high; #endif + #ifdef CONFIG_UPROBES - struct uprobe_task *utask; + struct uprobe_task *utask; #endif #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) - unsigned int sequential_io; - unsigned int sequential_io_avg; + unsigned int sequential_io; + unsigned int sequential_io_avg; #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP - unsigned long task_state_change; + unsigned long task_state_change; #endif - int pagefault_disabled; + int pagefault_disabled; #ifdef CONFIG_MMU - struct task_struct *oom_reaper_list; + struct task_struct *oom_reaper_list; #endif #ifdef CONFIG_VMAP_STACK - struct vm_struct *stack_vm_area; + struct vm_struct *stack_vm_area; #endif #ifdef CONFIG_THREAD_INFO_IN_TASK - /* A live task holds one reference. */ - atomic_t stack_refcount; + /* A live task holds one reference: */ + atomic_t stack_refcount; #endif -/* CPU-specific state of this task */ - struct thread_struct thread; -/* - * WARNING: on x86, 'thread_struct' contains a variable-sized - * structure. It *MUST* be at the end of 'task_struct'. - * - * Do not put anything below here! - */ + /* CPU-specific state of this task: */ + struct thread_struct thread; + + /* + * WARNING: on x86, 'thread_struct' contains a variable-sized + * structure. It *MUST* be at the end of 'task_struct'. + * + * Do not put anything below here! + */ }; static inline struct pid *task_pid(struct task_struct *task) @@ -975,7 +1060,7 @@ static inline struct pid *task_tgid(struct task_struct *task) } /* - * Without tasklist or rcu lock it is not safe to dereference + * Without tasklist or RCU lock it is not safe to dereference * the result of task_pgrp/task_session even if task == current, * we can race with another thread doing sys_setsid/sys_setpgid. */ @@ -1002,16 +1087,14 @@ static inline struct pid *task_session(struct task_struct *task) * * see also pid_nr() etc in include/linux/pid.h */ -pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, - struct pid_namespace *ns); +pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns); static inline pid_t task_pid_nr(struct task_struct *tsk) { return tsk->pid; } -static inline pid_t task_pid_nr_ns(struct task_struct *tsk, - struct pid_namespace *ns) +static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns); } @@ -1027,15 +1110,28 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk) return tsk->tgid; } -pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); +extern pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns); static inline pid_t task_tgid_vnr(struct task_struct *tsk) { return pid_vnr(task_tgid(tsk)); } +/** + * pid_alive - check that a task structure is not stale + * @p: Task structure to be checked. + * + * Test if a process is not yet dead (at most zombie state) + * If pid_alive fails, then pointers within the task structure + * can be stale and must not be dereferenced. + * + * Return: 1 if the process is alive. 0 otherwise. + */ +static inline int pid_alive(const struct task_struct *p) +{ + return p->pids[PIDTYPE_PID].pid != NULL; +} -static inline int pid_alive(const struct task_struct *p); static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) { pid_t pid = 0; @@ -1053,8 +1149,7 @@ static inline pid_t task_ppid_nr(const struct task_struct *tsk) return task_ppid_nr_ns(tsk, &init_pid_ns); } -static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, - struct pid_namespace *ns) +static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns); } @@ -1065,8 +1160,7 @@ static inline pid_t task_pgrp_vnr(struct task_struct *tsk) } -static inline pid_t task_session_nr_ns(struct task_struct *tsk, - struct pid_namespace *ns) +static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns); } @@ -1076,27 +1170,12 @@ static inline pid_t task_session_vnr(struct task_struct *tsk) return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); } -/* obsolete, do not use */ +/* Obsolete, do not use: */ static inline pid_t task_pgrp_nr(struct task_struct *tsk) { return task_pgrp_nr_ns(tsk, &init_pid_ns); } -/** - * pid_alive - check that a task structure is not stale - * @p: Task structure to be checked. - * - * Test if a process is not yet dead (at most zombie state) - * If pid_alive fails, then pointers within the task structure - * can be stale and must not be dereferenced. - * - * Return: 1 if the process is alive. 0 otherwise. - */ -static inline int pid_alive(const struct task_struct *p) -{ - return p->pids[PIDTYPE_PID].pid != NULL; -} - /** * is_global_init - check if a task structure is init. Since init * is free to have sub-threads we need to check tgid. @@ -1116,34 +1195,34 @@ extern struct pid *cad_pid; /* * Per process flags */ -#define PF_IDLE 0x00000002 /* I am an IDLE thread */ -#define PF_EXITING 0x00000004 /* getting shut down */ -#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ -#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ -#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ -#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ -#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ -#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ -#define PF_DUMPCORE 0x00000200 /* dumped core */ -#define PF_SIGNALED 0x00000400 /* killed by a signal */ -#define PF_MEMALLOC 0x00000800 /* Allocating memory */ -#define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ -#define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ -#define PF_USED_ASYNC 0x00004000 /* used async_schedule*(), used by module init */ -#define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ -#define PF_FROZEN 0x00010000 /* frozen for system suspend */ -#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ -#define PF_KSWAPD 0x00040000 /* I am kswapd */ -#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ -#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ -#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ -#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ -#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ -#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ -#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ -#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ -#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ -#define PF_SUSPEND_TASK 0x80000000 /* this thread called freeze_processes and should not be frozen */ +#define PF_IDLE 0x00000002 /* I am an IDLE thread */ +#define PF_EXITING 0x00000004 /* Getting shut down */ +#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */ +#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ +#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ +#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ +#define PF_MCE_PROCESS 0x00000080 /* Process policy on mce errors */ +#define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */ +#define PF_DUMPCORE 0x00000200 /* Dumped core */ +#define PF_SIGNALED 0x00000400 /* Killed by a signal */ +#define PF_MEMALLOC 0x00000800 /* Allocating memory */ +#define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ +#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ +#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ +#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ +#define PF_FROZEN 0x00010000 /* Frozen for system suspend */ +#define PF_FSTRANS 0x00020000 /* Inside a filesystem transaction */ +#define PF_KSWAPD 0x00040000 /* I am kswapd */ +#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ +#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ +#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ +#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ +#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ +#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ +#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ +#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ +#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ +#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ /* * Only the _current_ task can read/write to tsk->flags, but other @@ -1156,33 +1235,38 @@ extern struct pid *cad_pid; * child is not running and in turn not changing child->flags * at the same time the parent does it. */ -#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) -#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) -#define clear_used_math() clear_stopped_child_used_math(current) -#define set_used_math() set_stopped_child_used_math(current) +#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) +#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) +#define clear_used_math() clear_stopped_child_used_math(current) +#define set_used_math() set_stopped_child_used_math(current) + #define conditional_stopped_child_used_math(condition, child) \ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0) -#define conditional_used_math(condition) \ - conditional_stopped_child_used_math(condition, current) + +#define conditional_used_math(condition) conditional_stopped_child_used_math(condition, current) + #define copy_to_stopped_child_used_math(child) \ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) + /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ -#define tsk_used_math(p) ((p)->flags & PF_USED_MATH) -#define used_math() tsk_used_math(current) +#define tsk_used_math(p) ((p)->flags & PF_USED_MATH) +#define used_math() tsk_used_math(current) /* Per-process atomic flags. */ -#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ -#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ -#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ -#define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ +#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ +#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ +#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ +#define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ { return test_bit(PFA_##name, &p->atomic_flags); } + #define TASK_PFA_SET(name, func) \ static inline void task_set_##func(struct task_struct *p) \ { set_bit(PFA_##name, &p->atomic_flags); } + #define TASK_PFA_CLEAR(name, func) \ static inline void task_clear_##func(struct task_struct *p) \ { clear_bit(PFA_##name, &p->atomic_flags); } @@ -1201,30 +1285,23 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) TASK_PFA_TEST(LMK_WAITING, lmk_waiting) TASK_PFA_SET(LMK_WAITING, lmk_waiting) -static inline void tsk_restore_flags(struct task_struct *task, - unsigned long orig_flags, unsigned long flags) +static inline void +tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags) { task->flags &= ~flags; task->flags |= orig_flags & flags; } -extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, - const struct cpumask *trial); -extern int task_can_attach(struct task_struct *p, - const struct cpumask *cs_cpus_allowed); +extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); +extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed); #ifdef CONFIG_SMP -extern void do_set_cpus_allowed(struct task_struct *p, - const struct cpumask *new_mask); - -extern int set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask); +extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); +extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); #else -static inline void do_set_cpus_allowed(struct task_struct *p, - const struct cpumask *new_mask) +static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { } -static inline int set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask) +static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { if (!cpumask_test_cpu(0, new_mask)) return -EINVAL; @@ -1239,6 +1316,7 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); + /** * task_nice - return the nice value of a given task. * @p: the task in question. @@ -1249,16 +1327,15 @@ static inline int task_nice(const struct task_struct *p) { return PRIO_TO_NICE((p)->static_prio); } + extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); -extern int sched_setscheduler(struct task_struct *, int, - const struct sched_param *); -extern int sched_setscheduler_nocheck(struct task_struct *, int, - const struct sched_param *); -extern int sched_setattr(struct task_struct *, - const struct sched_attr *); +extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *); +extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); +extern int sched_setattr(struct task_struct *, const struct sched_attr *); extern struct task_struct *idle_task(int cpu); + /** * is_idle_task - is the specified task an idle task? * @p: the task in question. @@ -1269,6 +1346,7 @@ static inline bool is_idle_task(const struct task_struct *p) { return !!(p->flags & PF_IDLE); } + extern struct task_struct *curr_task(int cpu); extern void ia64_set_curr_task(int cpu, struct task_struct *p); @@ -1302,23 +1380,25 @@ static inline struct thread_info *task_thread_info(struct task_struct *task) */ extern struct task_struct *find_task_by_vpid(pid_t nr); -extern struct task_struct *find_task_by_pid_ns(pid_t nr, - struct pid_namespace *ns); +extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk); + #ifdef CONFIG_SMP - extern void kick_process(struct task_struct *tsk); +extern void kick_process(struct task_struct *tsk); #else - static inline void kick_process(struct task_struct *tsk) { } +static inline void kick_process(struct task_struct *tsk) { } #endif extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); + static inline void set_task_comm(struct task_struct *tsk, const char *from) { __set_task_comm(tsk, from, false); } + extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP @@ -1326,15 +1406,15 @@ void scheduler_ipi(void); extern unsigned long wait_task_inactive(struct task_struct *, long match_state); #else static inline void scheduler_ipi(void) { } -static inline unsigned long wait_task_inactive(struct task_struct *p, - long match_state) +static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state) { return 1; } #endif -/* set thread flags in other task's structures - * - see asm/thread_info.h for TIF_xxxx flags available +/* + * Set thread flags in other task's structures. + * See asm/thread_info.h for TIF_xxxx flags available: */ static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag) { -- cgit v1.2.3 From a528d35e8bfcc521d7cb70aaf03e1bd296c8493f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 31 Jan 2017 16:46:22 +0000 Subject: statx: Add a system call to make enhanced file info available Add a system call to make extended file information available, including file creation and some attribute flags where available through the underlying filesystem. The getattr inode operation is altered to take two additional arguments: a u32 request_mask and an unsigned int flags that indicate the synchronisation mode. This change is propagated to the vfs_getattr*() function. Functions like vfs_stat() are now inline wrappers around new functions vfs_statx() and vfs_statx_fd() to reduce stack usage. ======== OVERVIEW ======== The idea was initially proposed as a set of xattrs that could be retrieved with getxattr(), but the general preference proved to be for a new syscall with an extended stat structure. A number of requests were gathered for features to be included. The following have been included: (1) Make the fields a consistent size on all arches and make them large. (2) Spare space, request flags and information flags are provided for future expansion. (3) Better support for the y2038 problem [Arnd Bergmann] (tv_sec is an __s64). (4) Creation time: The SMB protocol carries the creation time, which could be exported by Samba, which will in turn help CIFS make use of FS-Cache as that can be used for coherency data (stx_btime). This is also specified in NFSv4 as a recommended attribute and could be exported by NFSD [Steve French]. (5) Lightweight stat: Ask for just those details of interest, and allow a netfs (such as NFS) to approximate anything not of interest, possibly without going to the server [Trond Myklebust, Ulrich Drepper, Andreas Dilger] (AT_STATX_DONT_SYNC). (6) Heavyweight stat: Force a netfs to go to the server, even if it thinks its cached attributes are up to date [Trond Myklebust] (AT_STATX_FORCE_SYNC). And the following have been left out for future extension: (7) Data version number: Could be used by userspace NFS servers [Aneesh Kumar]. Can also be used to modify fill_post_wcc() in NFSD which retrieves i_version directly, but has just called vfs_getattr(). It could get it from the kstat struct if it used vfs_xgetattr() instead. (There's disagreement on the exact semantics of a single field, since not all filesystems do this the same way). (8) BSD stat compatibility: Including more fields from the BSD stat such as creation time (st_btime) and inode generation number (st_gen) [Jeremy Allison, Bernd Schubert]. (9) Inode generation number: Useful for FUSE and userspace NFS servers [Bernd Schubert]. (This was asked for but later deemed unnecessary with the open-by-handle capability available and caused disagreement as to whether it's a security hole or not). (10) Extra coherency data may be useful in making backups [Andreas Dilger]. (No particular data were offered, but things like last backup timestamp, the data version number and the DOS archive bit would come into this category). (11) Allow the filesystem to indicate what it can/cannot provide: A filesystem can now say it doesn't support a standard stat feature if that isn't available, so if, for instance, inode numbers or UIDs don't exist or are fabricated locally... (This requires a separate system call - I have an fsinfo() call idea for this). (12) Store a 16-byte volume ID in the superblock that can be returned in struct xstat [Steve French]. (Deferred to fsinfo). (13) Include granularity fields in the time data to indicate the granularity of each of the times (NFSv4 time_delta) [Steve French]. (Deferred to fsinfo). (14) FS_IOC_GETFLAGS value. These could be translated to BSD's st_flags. Note that the Linux IOC flags are a mess and filesystems such as Ext4 define flags that aren't in linux/fs.h, so translation in the kernel may be a necessity (or, possibly, we provide the filesystem type too). (Some attributes are made available in stx_attributes, but the general feeling was that the IOC flags were to ext[234]-specific and shouldn't be exposed through statx this way). (15) Mask of features available on file (eg: ACLs, seclabel) [Brad Boyer, Michael Kerrisk]. (Deferred, probably to fsinfo. Finding out if there's an ACL or seclabal might require extra filesystem operations). (16) Femtosecond-resolution timestamps [Dave Chinner]. (A __reserved field has been left in the statx_timestamp struct for this - if there proves to be a need). (17) A set multiple attributes syscall to go with this. =============== NEW SYSTEM CALL =============== The new system call is: int ret = statx(int dfd, const char *filename, unsigned int flags, unsigned int mask, struct statx *buffer); The dfd, filename and flags parameters indicate the file to query, in a similar way to fstatat(). There is no equivalent of lstat() as that can be emulated with statx() by passing AT_SYMLINK_NOFOLLOW in flags. There is also no equivalent of fstat() as that can be emulated by passing a NULL filename to statx() with the fd of interest in dfd. Whether or not statx() synchronises the attributes with the backing store can be controlled by OR'ing a value into the flags argument (this typically only affects network filesystems): (1) AT_STATX_SYNC_AS_STAT tells statx() to behave as stat() does in this respect. (2) AT_STATX_FORCE_SYNC will require a network filesystem to synchronise its attributes with the server - which might require data writeback to occur to get the timestamps correct. (3) AT_STATX_DONT_SYNC will suppress synchronisation with the server in a network filesystem. The resulting values should be considered approximate. mask is a bitmask indicating the fields in struct statx that are of interest to the caller. The user should set this to STATX_BASIC_STATS to get the basic set returned by stat(). It should be noted that asking for more information may entail extra I/O operations. buffer points to the destination for the data. This must be 256 bytes in size. ====================== MAIN ATTRIBUTES RECORD ====================== The following structures are defined in which to return the main attribute set: struct statx_timestamp { __s64 tv_sec; __s32 tv_nsec; __s32 __reserved; }; struct statx { __u32 stx_mask; __u32 stx_blksize; __u64 stx_attributes; __u32 stx_nlink; __u32 stx_uid; __u32 stx_gid; __u16 stx_mode; __u16 __spare0[1]; __u64 stx_ino; __u64 stx_size; __u64 stx_blocks; __u64 __spare1[1]; struct statx_timestamp stx_atime; struct statx_timestamp stx_btime; struct statx_timestamp stx_ctime; struct statx_timestamp stx_mtime; __u32 stx_rdev_major; __u32 stx_rdev_minor; __u32 stx_dev_major; __u32 stx_dev_minor; __u64 __spare2[14]; }; The defined bits in request_mask and stx_mask are: STATX_TYPE Want/got stx_mode & S_IFMT STATX_MODE Want/got stx_mode & ~S_IFMT STATX_NLINK Want/got stx_nlink STATX_UID Want/got stx_uid STATX_GID Want/got stx_gid STATX_ATIME Want/got stx_atime{,_ns} STATX_MTIME Want/got stx_mtime{,_ns} STATX_CTIME Want/got stx_ctime{,_ns} STATX_INO Want/got stx_ino STATX_SIZE Want/got stx_size STATX_BLOCKS Want/got stx_blocks STATX_BASIC_STATS [The stuff in the normal stat struct] STATX_BTIME Want/got stx_btime{,_ns} STATX_ALL [All currently available stuff] stx_btime is the file creation time, stx_mask is a bitmask indicating the data provided and __spares*[] are where as-yet undefined fields can be placed. Time fields are structures with separate seconds and nanoseconds fields plus a reserved field in case we want to add even finer resolution. Note that times will be negative if before 1970; in such a case, the nanosecond fields will also be negative if not zero. The bits defined in the stx_attributes field convey information about a file, how it is accessed, where it is and what it does. The following attributes map to FS_*_FL flags and are the same numerical value: STATX_ATTR_COMPRESSED File is compressed by the fs STATX_ATTR_IMMUTABLE File is marked immutable STATX_ATTR_APPEND File is append-only STATX_ATTR_NODUMP File is not to be dumped STATX_ATTR_ENCRYPTED File requires key to decrypt in fs Within the kernel, the supported flags are listed by: KSTAT_ATTR_FS_IOC_FLAGS [Are any other IOC flags of sufficient general interest to be exposed through this interface?] New flags include: STATX_ATTR_AUTOMOUNT Object is an automount trigger These are for the use of GUI tools that might want to mark files specially, depending on what they are. Fields in struct statx come in a number of classes: (0) stx_dev_*, stx_blksize. These are local system information and are always available. (1) stx_mode, stx_nlinks, stx_uid, stx_gid, stx_[amc]time, stx_ino, stx_size, stx_blocks. These will be returned whether the caller asks for them or not. The corresponding bits in stx_mask will be set to indicate whether they actually have valid values. If the caller didn't ask for them, then they may be approximated. For example, NFS won't waste any time updating them from the server, unless as a byproduct of updating something requested. If the values don't actually exist for the underlying object (such as UID or GID on a DOS file), then the bit won't be set in the stx_mask, even if the caller asked for the value. In such a case, the returned value will be a fabrication. Note that there are instances where the type might not be valid, for instance Windows reparse points. (2) stx_rdev_*. This will be set only if stx_mode indicates we're looking at a blockdev or a chardev, otherwise will be 0. (3) stx_btime. Similar to (1), except this will be set to 0 if it doesn't exist. ======= TESTING ======= The following test program can be used to test the statx system call: samples/statx/test-statx.c Just compile and run, passing it paths to the files you want to examine. The file is built automatically if CONFIG_SAMPLES is enabled. Here's some example output. Firstly, an NFS directory that crosses to another FSID. Note that the AUTOMOUNT attribute is set because transiting this directory will cause d_automount to be invoked by the VFS. [root@andromeda ~]# /tmp/test-statx -A /warthog/data statx(/warthog/data) = 0 results=7ff Size: 4096 Blocks: 8 IO Block: 1048576 directory Device: 00:26 Inode: 1703937 Links: 125 Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041 Access: 2016-11-24 09:02:12.219699527+0000 Modify: 2016-11-17 10:44:36.225653653+0000 Change: 2016-11-17 10:44:36.225653653+0000 Attributes: 0000000000001000 (-------- -------- -------- -------- -------- -------- ---m---- --------) Secondly, the result of automounting on that directory. [root@andromeda ~]# /tmp/test-statx /warthog/data statx(/warthog/data) = 0 results=7ff Size: 4096 Blocks: 8 IO Block: 1048576 directory Device: 00:27 Inode: 2 Links: 125 Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041 Access: 2016-11-24 09:02:12.219699527+0000 Modify: 2016-11-17 10:44:36.225653653+0000 Change: 2016-11-17 10:44:36.225653653+0000 Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/fs.h | 35 +++++++++++++++++++++++++++-------- include/linux/nfs_fs.h | 2 +- include/linux/stat.h | 24 ++++++++++++++++++------ include/linux/syscalls.h | 3 +++ 4 files changed, 49 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 52350947c670..aad3fd0ff5f8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1709,7 +1709,7 @@ struct inode_operations { int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); - int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); + int (*getattr) (const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); @@ -2902,8 +2902,8 @@ extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); extern void generic_fillattr(struct inode *, struct kstat *); -int vfs_getattr_nosec(struct path *path, struct kstat *stat); -extern int vfs_getattr(struct path *, struct kstat *); +extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); +extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); void inode_add_bytes(struct inode *inode, loff_t bytes); void __inode_sub_bytes(struct inode *inode, loff_t bytes); @@ -2916,10 +2916,29 @@ extern const struct inode_operations simple_symlink_inode_operations; extern int iterate_dir(struct file *, struct dir_context *); -extern int vfs_stat(const char __user *, struct kstat *); -extern int vfs_lstat(const char __user *, struct kstat *); -extern int vfs_fstat(unsigned int, struct kstat *); -extern int vfs_fstatat(int , const char __user *, struct kstat *, int); +extern int vfs_statx(int, const char __user *, int, struct kstat *, u32); +extern int vfs_statx_fd(unsigned int, struct kstat *, u32, unsigned int); + +static inline int vfs_stat(const char __user *filename, struct kstat *stat) +{ + return vfs_statx(AT_FDCWD, filename, 0, stat, STATX_BASIC_STATS); +} +static inline int vfs_lstat(const char __user *name, struct kstat *stat) +{ + return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW, + stat, STATX_BASIC_STATS); +} +static inline int vfs_fstatat(int dfd, const char __user *filename, + struct kstat *stat, int flags) +{ + return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); +} +static inline int vfs_fstat(int fd, struct kstat *stat) +{ + return vfs_statx_fd(fd, stat, STATX_BASIC_STATS, 0); +} + + extern const char *vfs_get_link(struct dentry *, struct delayed_call *); extern int vfs_readlink(struct dentry *, char __user *, int); @@ -2949,7 +2968,7 @@ extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); extern int dcache_readdir(struct file *, struct dir_context *); extern int simple_setattr(struct dentry *, struct iattr *); -extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int simple_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int simple_statfs(struct dentry *, struct kstatfs *); extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f1da8c8dd473..287f34161086 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -335,7 +335,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr); -extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int nfs_getattr(const struct path *, struct kstat *, u32, unsigned int); extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); extern void nfs_access_set_mask(struct nfs_access_entry *, u32); extern int nfs_permission(struct inode *, int); diff --git a/include/linux/stat.h b/include/linux/stat.h index 075cb0c7eb2a..c76e524fb34b 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -18,20 +18,32 @@ #include #include +#define KSTAT_QUERY_FLAGS (AT_STATX_SYNC_TYPE) + struct kstat { - u64 ino; - dev_t dev; + u32 result_mask; /* What fields the user got */ umode_t mode; unsigned int nlink; + uint32_t blksize; /* Preferred I/O size */ + u64 attributes; +#define KSTAT_ATTR_FS_IOC_FLAGS \ + (STATX_ATTR_COMPRESSED | \ + STATX_ATTR_IMMUTABLE | \ + STATX_ATTR_APPEND | \ + STATX_ATTR_NODUMP | \ + STATX_ATTR_ENCRYPTED \ + )/* Attrs corresponding to FS_*_FL flags */ + u64 ino; + dev_t dev; + dev_t rdev; kuid_t uid; kgid_t gid; - dev_t rdev; loff_t size; - struct timespec atime; + struct timespec atime; struct timespec mtime; struct timespec ctime; - unsigned long blksize; - unsigned long long blocks; + struct timespec btime; /* File creation time */ + u64 blocks; }; #endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 91a740f6b884..980c3c9b06f8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -48,6 +48,7 @@ struct stat; struct stat64; struct statfs; struct statfs64; +struct statx; struct __sysctl_args; struct sysinfo; struct timespec; @@ -902,5 +903,7 @@ asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len, unsigned long prot, int pkey); asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val); asmlinkage long sys_pkey_free(int pkey); +asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, + unsigned mask, struct statx __user *buffer); #endif -- cgit v1.2.3 From cd8d860dcce906cd477be7d0648ba6f56a52eaa6 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 28 Feb 2017 11:32:22 -0500 Subject: jump_label: Fix anonymous union initialization Pre-4.6 gcc do not allow direct static initialization of members of anonymous structs/unions. After commit 3821fd35b58d ("jump_label: Reduce the size of struct static_key") STATIC_KEY_INIT_{TRUE|FALSE} definitions cannot be compiled with those older compilers. Placing initializers inside curved brackets works around this problem. Link: http://lkml.kernel.org/r/1488299542-30765-1-git-send-email-boris.ostrovsky@oracle.com Fixes: 3821fd35b58d ("jump_label: Reduce the size of struct static_key") Reviewed-by: Jason Baron Compiled-by: Chris Mason Signed-off-by: Boris Ostrovsky Signed-off-by: Steven Rostedt (VMware) --- include/linux/jump_label.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 680c98b2f41c..a7f90117cf7d 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -166,10 +166,10 @@ extern void static_key_disable(struct static_key *key); */ #define STATIC_KEY_INIT_TRUE \ { .enabled = { 1 }, \ - .entries = (void *)JUMP_TYPE_TRUE } + { .entries = (void *)JUMP_TYPE_TRUE } } #define STATIC_KEY_INIT_FALSE \ { .enabled = { 0 }, \ - .entries = (void *)JUMP_TYPE_FALSE } + { .entries = (void *)JUMP_TYPE_FALSE } } #else /* !HAVE_JUMP_LABEL */ -- cgit v1.2.3 From b17ef2ed624aa7c1f68ed11acd16ecbf80fe01d7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 2 Mar 2017 17:28:45 -0500 Subject: jump_label: Add comment about initialization order for anonymous unions Commit 3821fd35b58d ("jump_label: Reduce the size of struct static_key") broke old compilers that could not handle static initialization of anonymous unions. Boris fixed it with a patch that added brackets around the static initializer. But this creates a dependency between those initializers and the structure's order of its fields. Document this dependency in case new fields are added to struct static_key in the future. Noted-by: Boris Ostrovsky Suggested-by: Chris Mason Signed-off-by: Steven Rostedt (VMware) --- include/linux/jump_label.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index a7f90117cf7d..28e04a33535a 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -90,6 +90,13 @@ extern bool static_key_initialized; struct static_key { atomic_t enabled; /* + * Note: + * To make anonymous unions work with old compilers, the static + * initialization of them requires brackets. This creates a dependency + * on the order of the struct with the initializers. If any fields + * are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need + * to be modified. + * * bit 0 => 1 if key is initially true * 0 if initially false * bit 1 => 1 if points to struct static_key_mod -- cgit v1.2.3 From 0878fff1f42c18e448ab5b8b4f6a3eb32365b5b6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 5 Mar 2017 12:34:49 -0800 Subject: net: phy: Do not perform software reset for Generic PHY The Generic PHY driver is a catch-all PHY driver and it should preserve whatever prior initialization has been done by boot loader or firmware agents. For specific PHY device configuration it is expected that a specialized PHY driver would take over that role. Resetting the generic PHY was a bad idea that has lead to several complaints and downstream workarounds e.g: in OpenWrt/LEDE so restore the behavior prior to 87aa9f9c61ad ("net: phy: consolidate PHY reset in phy_init_hw()"). Reported-by: Felix Fietkau Fixes: 87aa9f9c61ad ("net: phy: consolidate PHY reset in phy_init_hw()") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 772476028a65..43a774873aa9 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -837,6 +837,10 @@ int genphy_read_status(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); int genphy_soft_reset(struct phy_device *phydev); +static inline int genphy_no_soft_reset(struct phy_device *phydev) +{ + return 0; +} void phy_driver_unregister(struct phy_driver *drv); void phy_drivers_unregister(struct phy_driver *drv, int n); int phy_driver_register(struct phy_driver *new_driver, struct module *owner); -- cgit v1.2.3 From 4dfc050571523ac2bc02cbf948dd47621f7dd83f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 21 Feb 2017 11:32:47 +0000 Subject: KVM: arm/arm64: vgic-v3: Don't pretend to support IRQ/FIQ bypass Our GICv3 emulation always presents ICC_SRE_EL1 with DIB/DFB set to zero, which implies that there is a way to bypass the GIC and inject raw IRQ/FIQ by driving the CPU pins. Of course, we don't allow that when the GIC is configured, but we fail to indicate that to the guest. The obvious fix is to set these bits (and never let them being changed again). Reported-by: Peter Maydell Acked-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 672cfef72fc8..97cbca19430d 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -373,6 +373,8 @@ #define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) #define ICC_IGRPEN1_EL1_SHIFT 0 #define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) +#define ICC_SRE_EL1_DIB (1U << 2) +#define ICC_SRE_EL1_DFB (1U << 1) #define ICC_SRE_EL1_SRE (1U << 0) /* -- cgit v1.2.3 From b3e228473e6cec7cf83b4025b4570c8066ab2dd8 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Thu, 2 Mar 2017 16:11:47 +0100 Subject: irqdomain: Add empty irq_domain_check_msi_remap Fix following build error for s390: drivers/vfio/vfio_iommu_type1.c: In function 'vfio_iommu_type1_attach_group': drivers/vfio/vfio_iommu_type1.c:1290:25: error: implicit declaration of function 'irq_domain_check_msi_remap' Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Marc Zyngier --- include/linux/irqdomain.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 188eced6813e..9f3616085423 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -524,6 +524,10 @@ static inline struct irq_domain *irq_find_matching_fwnode( { return NULL; } +static inline bool irq_domain_check_msi_remap(void) +{ + return false; +} #endif /* !CONFIG_IRQ_DOMAIN */ #endif /* _LINUX_IRQDOMAIN_H */ -- cgit v1.2.3 From 040757f738e13caaa9c5078bca79aa97e11dde88 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 5 Mar 2017 15:03:22 -0600 Subject: ucount: Remove the atomicity from ucount->count Always increment/decrement ucount->count under the ucounts_lock. The increments are there already and moving the decrements there means the locking logic of the code is simpler. This simplification in the locking logic fixes a race between put_ucounts and get_ucounts that could result in a use-after-free because the count could go zero then be found by get_ucounts and then be freed by put_ucounts. A bug presumably this one was found by a combination of syzkaller and KASAN. JongWhan Kim reported the syzkaller failure and Dmitry Vyukov spotted the race in the code. Cc: stable@vger.kernel.org Fixes: f6b2db1a3e8d ("userns: Make the count of user namespaces per user") Reported-by: JongHwan Kim Reported-by: Dmitry Vyukov Reviewed-by: Andrei Vagin Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index be765234c0a2..32354b4b4b2b 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -72,7 +72,7 @@ struct ucounts { struct hlist_node node; struct user_namespace *ns; kuid_t uid; - atomic_t count; + int count; atomic_t ucount[UCOUNT_COUNTS]; }; -- cgit v1.2.3 From 7cc5e38f2f0b0b58a22a4c18a56348dd99a71270 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sun, 12 Feb 2017 17:11:07 +0100 Subject: libceph: osd_request_timeout option osd_request_timeout specifies how many seconds to wait for a response from OSDs before returning -ETIMEDOUT from an OSD request. 0 (default) means no limit. osd_request_timeout is osdkeepalive-precise -- in-flight requests are swept through every osdkeepalive seconds. With ack vs commit behaviour gone, abort_request() is really simple. This is based on a patch from Artur Molchanov . Tested-by: Artur Molchanov Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/libceph.h | 2 ++ include/linux/ceph/osd_client.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 1816c5e26581..88cd5dc8e238 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -48,6 +48,7 @@ struct ceph_options { unsigned long mount_timeout; /* jiffies */ unsigned long osd_idle_ttl; /* jiffies */ unsigned long osd_keepalive_timeout; /* jiffies */ + unsigned long osd_request_timeout; /* jiffies */ /* * any type that can't be simply compared or doesn't need need @@ -68,6 +69,7 @@ struct ceph_options { #define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000) #define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000) #define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000) +#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0 /* no timeout */ #define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000) #define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000) diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 2ea0c282f3dc..c125b5d9e13c 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -189,6 +189,7 @@ struct ceph_osd_request { /* internal */ unsigned long r_stamp; /* jiffies, send or check time */ + unsigned long r_start_stamp; /* jiffies */ int r_attempts; struct ceph_eversion r_replay_version; /* aka reassert_version */ u32 r_last_force_resend; -- cgit v1.2.3 From 62f8f4d9066c1c6f2474845d1ca7e2891f2ae3fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 5 Mar 2017 10:52:16 -0800 Subject: dccp: fix use-after-free in dccp_feat_activate_values Dmitry reported crashes in DCCP stack [1] Problem here is that when I got rid of listener spinlock, I missed the fact that DCCP stores a complex state in struct dccp_request_sock, while TCP does not. Since multiple cpus could access it at the same time, we need to add protection. [1] BUG: KASAN: use-after-free in dccp_feat_activate_values+0x967/0xab0 net/dccp/feat.c:1541 at addr ffff88003713be68 Read of size 8 by task syz-executor2/8457 CPU: 2 PID: 8457 Comm: syz-executor2 Not tainted 4.10.0-rc7+ #127 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x292/0x398 lib/dump_stack.c:51 kasan_object_err+0x1c/0x70 mm/kasan/report.c:162 print_address_description mm/kasan/report.c:200 [inline] kasan_report_error mm/kasan/report.c:289 [inline] kasan_report.part.1+0x20e/0x4e0 mm/kasan/report.c:311 kasan_report mm/kasan/report.c:332 [inline] __asan_report_load8_noabort+0x29/0x30 mm/kasan/report.c:332 dccp_feat_activate_values+0x967/0xab0 net/dccp/feat.c:1541 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 NF_HOOK include/linux/netfilter.h:257 [inline] ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 dst_input include/net/dst.h:507 [inline] ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:257 [inline] ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 process_backlog+0xe5/0x6c0 net/core/dev.c:4839 napi_poll net/core/dev.c:5202 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5267 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902 do_softirq.part.17+0x1e8/0x230 kernel/softirq.c:328 do_softirq kernel/softirq.c:176 [inline] __local_bh_enable_ip+0x1f2/0x200 kernel/softirq.c:181 local_bh_enable include/linux/bottom_half.h:31 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:971 [inline] ip6_finish_output2+0xbb0/0x23d0 net/ipv6/ip6_output.c:123 ip6_finish_output+0x302/0x960 net/ipv6/ip6_output.c:148 NF_HOOK_COND include/linux/netfilter.h:246 [inline] ip6_output+0x1cb/0x8d0 net/ipv6/ip6_output.c:162 ip6_xmit+0xcdf/0x20d0 include/net/dst.h:501 inet6_csk_xmit+0x320/0x5f0 net/ipv6/inet6_connection_sock.c:179 dccp_transmit_skb+0xb09/0x1120 net/dccp/output.c:141 dccp_xmit_packet+0x215/0x760 net/dccp/output.c:280 dccp_write_xmit+0x168/0x1d0 net/dccp/output.c:362 dccp_sendmsg+0x79c/0xb10 net/dccp/proto.c:796 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 [inline] sock_sendmsg+0xca/0x110 net/socket.c:645 SYSC_sendto+0x660/0x810 net/socket.c:1687 SyS_sendto+0x40/0x50 net/socket.c:1655 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x4458b9 RSP: 002b:00007f8ceb77bb58 EFLAGS: 00000282 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000017 RCX: 00000000004458b9 RDX: 0000000000000023 RSI: 0000000020e60000 RDI: 0000000000000017 RBP: 00000000006e1b90 R08: 00000000200f9fe1 R09: 0000000000000020 R10: 0000000000008010 R11: 0000000000000282 R12: 00000000007080a8 R13: 0000000000000000 R14: 00007f8ceb77c9c0 R15: 00007f8ceb77c700 Object at ffff88003713be50, in cache kmalloc-64 size: 64 Allocated: PID = 8446 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605 kmem_cache_alloc_trace+0x82/0x270 mm/slub.c:2738 kmalloc include/linux/slab.h:490 [inline] dccp_feat_entry_new+0x214/0x410 net/dccp/feat.c:467 dccp_feat_push_change+0x38/0x220 net/dccp/feat.c:487 __feat_register_sp+0x223/0x2f0 net/dccp/feat.c:741 dccp_feat_propagate_ccid+0x22b/0x2b0 net/dccp/feat.c:949 dccp_feat_server_ccid_dependencies+0x1b3/0x250 net/dccp/feat.c:1012 dccp_make_response+0x1f1/0xc90 net/dccp/output.c:423 dccp_v6_send_response+0x4ec/0xc20 net/dccp/ipv6.c:217 dccp_v6_conn_request+0xaba/0x11b0 net/dccp/ipv6.c:377 dccp_rcv_state_process+0x51e/0x1650 net/dccp/input.c:606 dccp_v6_do_rcv+0x213/0x350 net/dccp/ipv6.c:632 sk_backlog_rcv include/net/sock.h:893 [inline] __sk_receive_skb+0x36f/0xcc0 net/core/sock.c:479 dccp_v6_rcv+0xba5/0x1d00 net/dccp/ipv6.c:742 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 NF_HOOK include/linux/netfilter.h:257 [inline] ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 dst_input include/net/dst.h:507 [inline] ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:257 [inline] ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 process_backlog+0xe5/0x6c0 net/core/dev.c:4839 napi_poll net/core/dev.c:5202 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5267 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 Freed: PID = 15 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 [inline] kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578 slab_free_hook mm/slub.c:1355 [inline] slab_free_freelist_hook mm/slub.c:1377 [inline] slab_free mm/slub.c:2954 [inline] kfree+0xe8/0x2b0 mm/slub.c:3874 dccp_feat_entry_destructor.part.4+0x48/0x60 net/dccp/feat.c:418 dccp_feat_entry_destructor net/dccp/feat.c:416 [inline] dccp_feat_list_pop net/dccp/feat.c:541 [inline] dccp_feat_activate_values+0x57f/0xab0 net/dccp/feat.c:1543 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 NF_HOOK include/linux/netfilter.h:257 [inline] ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 dst_input include/net/dst.h:507 [inline] ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:257 [inline] ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 process_backlog+0xe5/0x6c0 net/core/dev.c:4839 napi_poll net/core/dev.c:5202 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5267 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 Memory state around the buggy address: ffff88003713bd00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88003713bd80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88003713be00: fc fc fc fc fc fc fc fc fc fc fb fb fb fb fb fb ^ Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: David S. Miller --- include/linux/dccp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 61d042bbbf60..68449293c4b6 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -163,6 +163,7 @@ struct dccp_request_sock { __u64 dreq_isr; __u64 dreq_gsr; __be32 dreq_service; + spinlock_t dreq_lock; struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; -- cgit v1.2.3 From c01228db4ba965986511a5b28c478bddd7e2726e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 8 Mar 2017 17:48:34 +0100 Subject: Revert "scsi, block: fix duplicate bdi name registration crashes" This reverts commit 0dba1314d4f81115dce711292ec7981d17231064. It causes leaking of device numbers for SCSI when SCSI registers multiple gendisks for one request_queue in succession. It can be easily reproduced using Omar's script [1] on kernel with CONFIG_DEBUG_TEST_DRIVER_REMOVE. Furthermore the protection provided by this commit is not needed anymore as the problem it was fixing got also fixed by commit 165a5e22fafb "block: Move bdi_unregister() to del_gendisk()". [1]: http://marc.info/?l=linux-block&m=148554717109098&w=2 Signed-off-by: Jan Kara Acked-by: Dan Williams Tested-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - include/linux/genhd.h | 8 -------- 2 files changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 796016e63c1d..5a7da607ca04 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -435,7 +435,6 @@ struct request_queue { struct delayed_work delay_work; struct backing_dev_info *backing_dev_info; - struct disk_devt *disk_devt; /* * The queue owner gets to use this for whatever they like. diff --git a/include/linux/genhd.h b/include/linux/genhd.h index a999d281a2f1..76f39754e7b0 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -167,13 +167,6 @@ struct blk_integrity { }; #endif /* CONFIG_BLK_DEV_INTEGRITY */ -struct disk_devt { - atomic_t count; - void (*release)(struct disk_devt *disk_devt); -}; - -void put_disk_devt(struct disk_devt *disk_devt); -void get_disk_devt(struct disk_devt *disk_devt); struct gendisk { /* major, first_minor and minors are input parameters only, @@ -183,7 +176,6 @@ struct gendisk { int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ - struct disk_devt *disk_devt; char disk_name[DISK_NAME_LEN]; /* name of major driver */ char *(*devnode)(struct gendisk *gd, umode_t *mode); -- cgit v1.2.3 From bd0f9b356d00aa241ced36fb075a07041c28d3b8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 7 Mar 2017 15:33:14 -0800 Subject: sched/headers: fix up header file dependency on The scheduler header file split and cleanups ended up exposing a few nasty header file dependencies, and in particular it showed how we in ended up depending on "signal_pending()", which now comes from . That's a very subtle and annoying dependency, which already caused a semantic merge conflict (see commit e58bc927835a "Pull overlayfs updates from Miklos Szeredi", which added that fixup in the merge commit). It turns out that we can avoid this dependency _and_ improve code generation by moving the guts of the fairly nasty helper #define __wait_event_interruptible_locked() to out-of-line code. The code that includes the signal_pending() check is all in the slow-path where we actually go to sleep waiting for the event anyway, so using a helper function is the right thing to do. Using a helper function is also what we already did for the non-locked versions, see the "__wait_event*()" macros and the "prepare_to_wait*()" set of helper functions. We might want to try to unify all these macro games, we have a _lot_ of subtly different wait-event loops. But this is the minimal patch to fix the annoying header dependency. Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/wait.h | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index aacb1282d19a..db076ca7f11d 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -620,30 +620,19 @@ do { \ __ret; \ }) +extern int do_wait_intr(wait_queue_head_t *, wait_queue_t *); +extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *); -#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \ +#define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \ ({ \ - int __ret = 0; \ + int __ret; \ DEFINE_WAIT(__wait); \ if (exclusive) \ __wait.flags |= WQ_FLAG_EXCLUSIVE; \ do { \ - if (likely(list_empty(&__wait.task_list))) \ - __add_wait_queue_tail(&(wq), &__wait); \ - set_current_state(TASK_INTERRUPTIBLE); \ - if (signal_pending(current)) { \ - __ret = -ERESTARTSYS; \ + __ret = fn(&(wq), &__wait); \ + if (__ret) \ break; \ - } \ - if (irq) \ - spin_unlock_irq(&(wq).lock); \ - else \ - spin_unlock(&(wq).lock); \ - schedule(); \ - if (irq) \ - spin_lock_irq(&(wq).lock); \ - else \ - spin_lock(&(wq).lock); \ } while (!(condition)); \ __remove_wait_queue(&(wq), &__wait); \ __set_current_state(TASK_RUNNING); \ @@ -676,7 +665,7 @@ do { \ */ #define wait_event_interruptible_locked(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr)) /** * wait_event_interruptible_locked_irq - sleep until a condition gets true @@ -703,7 +692,7 @@ do { \ */ #define wait_event_interruptible_locked_irq(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr_irq)) /** * wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true @@ -734,7 +723,7 @@ do { \ */ #define wait_event_interruptible_exclusive_locked(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr)) /** * wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true @@ -765,7 +754,7 @@ do { \ */ #define wait_event_interruptible_exclusive_locked_irq(wq, condition) \ ((condition) \ - ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1)) + ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr_irq)) #define __wait_event_killable(wq, condition) \ -- cgit v1.2.3 From 505a60e225606fbd3d2eadc31ff793d939ba66f1 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 17:24:03 +0300 Subject: asm-generic: introduce 5level-fixup.h We are going to switch core MM to 5-level paging abstraction. This is preparation step which adds As with 4level-fixup.h, the new header allows quickly make all architectures compatible with 5-level paging in core MM. In long run we would like to switch architectures to properly folded p4d level by using , but it requires more changes to arch-specific code. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0d65dd72c0f4..be1fe264eb37 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1619,11 +1619,14 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); * Remove it when 4level-fixup.h has been removed. */ #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) + +#ifndef __ARCH_HAS_5LEVEL_HACK static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) { return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? NULL: pud_offset(pgd, address); } +#endif /* !__ARCH_HAS_5LEVEL_HACK */ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) { -- cgit v1.2.3 From c2febafc67734a62196c1b9dfba926412d4077ba Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 17:24:07 +0300 Subject: mm: convert generic code to 5-level paging Convert all non-architecture-specific code to 5-level paging. It's mostly mechanical adding handling one more page table level in places where we deal with pud_t. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 5 ++++- include/linux/kasan.h | 1 + include/linux/mm.h | 31 +++++++++++++++++++++++++------ 3 files changed, 30 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 503099d8aada..b857fc8cc2ec 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -122,7 +122,7 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags); int pmd_huge(pmd_t pmd); -int pud_huge(pud_t pmd); +int pud_huge(pud_t pud); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); @@ -197,6 +197,9 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb, #ifndef pgd_huge #define pgd_huge(x) 0 #endif +#ifndef p4d_huge +#define p4d_huge(x) 0 +#endif #ifndef pgd_write static inline int pgd_write(pgd_t pgd) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index ceb3fe78a0d3..1c823bef4c15 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -18,6 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE]; extern pte_t kasan_zero_pte[PTRS_PER_PTE]; extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; extern pud_t kasan_zero_pud[PTRS_PER_PUD]; +extern p4d_t kasan_zero_p4d[PTRS_PER_P4D]; void kasan_populate_zero_shadow(const void *shadow_start, const void *shadow_end); diff --git a/include/linux/mm.h b/include/linux/mm.h index be1fe264eb37..5f01c88f0800 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1560,14 +1560,24 @@ static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, return ptep; } +#ifdef __PAGETABLE_P4D_FOLDED +static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, + unsigned long address) +{ + return 0; +} +#else +int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); +#endif + #ifdef __PAGETABLE_PUD_FOLDED -static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, +static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) { return 0; } #else -int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); +int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address); #endif #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU) @@ -1621,10 +1631,18 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) #ifndef __ARCH_HAS_5LEVEL_HACK -static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) +static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd, + unsigned long address) +{ + return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ? + NULL : p4d_offset(pgd, address); +} + +static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d, + unsigned long address) { - return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? - NULL: pud_offset(pgd, address); + return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ? + NULL : pud_offset(p4d, address); } #endif /* !__ARCH_HAS_5LEVEL_HACK */ @@ -2388,7 +2406,8 @@ void sparse_mem_maps_populate_node(struct page **map_map, struct page *sparse_mem_map_populate(unsigned long pnum, int nid); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); -pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); +p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); +pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); void *vmemmap_alloc_block(unsigned long size, int node); -- cgit v1.2.3 From 4fe8435909fddc97b81472026aa954e06dd192a5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 7 Mar 2017 20:00:13 -0800 Subject: bpf: convert htab map to hlist_nulls when all map elements are pre-allocated one cpu can delete and reuse htab_elem while another cpu is still walking the hlist. In such case the lookup may miss the element. Convert hlist to hlist_nulls to avoid such scenario. When bucket lock is taken there is no need to take such precautions, so only convert map_lookup and map_get_next to nulls. The race window is extremely small and only reproducible with explicit udelay() inside lookup_nulls_elem_raw() Similar to hlist add hlist_nulls_for_each_entry_safe() and hlist_nulls_entry_safe() helpers. Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Reported-by: Jonathan Perry Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/list_nulls.h | 5 +++++ include/linux/rculist_nulls.h | 14 ++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index b01fe1009084..87ff4f58a2f0 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -29,6 +29,11 @@ struct hlist_nulls_node { ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls)) #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) + +#define hlist_nulls_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \ + }) /** * ptr_is_a_nulls - Test if a ptr is a nulls * @ptr: ptr to be tested diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 4ae95f7e8597..a23a33153180 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -156,5 +156,19 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) +/** + * hlist_nulls_for_each_entry_safe - + * iterate over list of given type safe against removal of list entry + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_nulls_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_nulls_node within the struct. + */ +#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \ + for (({barrier();}), \ + pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ + (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \ + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });) #endif #endif -- cgit v1.2.3 From 8a1115ff6b6d90cf1066ec3a0c4e51276553eebe Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 9 Mar 2017 16:16:31 -0800 Subject: scripts/spelling.txt: add "disble(d)" pattern and fix typo instances Fix typos and add the following to the scripts/spelling.txt: disble||disable disbled||disabled I kept the TSL2563_INT_DISBLED in /drivers/iio/light/tsl2563.c untouched. The macro is not referenced at all, but this commit is touching only comment blocks just in case. Link: http://lkml.kernel.org/r/1481573103-11329-20-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/regulator/machine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index ad3e5158e586..c9f795e9a2ee 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -65,7 +65,7 @@ struct regulator_state { int uV; /* suspend voltage */ unsigned int mode; /* suspend regulator operating mode */ int enabled; /* is regulator enabled in this suspend state */ - int disabled; /* is the regulator disbled in this suspend state */ + int disabled; /* is the regulator disabled in this suspend state */ }; /** -- cgit v1.2.3 From dd0db88d8094a6d9d4d1fc5fcd56ab619f54ccf8 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 9 Mar 2017 16:16:49 -0800 Subject: userfaultfd: non-cooperative: rollback userfaultfd_exit Patch series "userfaultfd non-cooperative further update for 4.11 merge window". Unfortunately I noticed one relevant bug in userfaultfd_exit while doing more testing. I've been doing testing before and this was also tested by kbuild bot and exercised by the selftest, but this bug never reproduced before. I dropped userfaultfd_exit as result. I dropped it because of implementation difficulty in receiving signals in __mmput and because I think -ENOSPC as result from the background UFFDIO_COPY should be enough already. Before I decided to remove userfaultfd_exit, I noticed userfaultfd_exit wasn't exercised by the selftest and when I tried to exercise it, after moving it to a more correct place in __mmput where it would make more sense and where the vma list is stable, it resulted in the event_wait_completion in D state. So then I added the second patch to be sure even if we call userfaultfd_event_wait_completion too late during task exit(), we won't risk to generate tasks in D state. The same check exists in handle_userfault() for the same reason, except it makes a difference there, while here is just a robustness check and it's run under WARN_ON_ONCE. While looking at the userfaultfd_event_wait_completion() function I looked back at its callers too while at it and I think it's not ok to stop executing dup_fctx on the fcs list because we relay on userfaultfd_event_wait_completion to execute userfaultfd_ctx_put(fctx->orig) which is paired against userfaultfd_ctx_get(fctx->orig) in dup_userfault just before list_add(fcs). This change only takes care of fctx->orig but this area also needs further review looking for similar problems in fctx->new. The only patch that is urgent is the first because it's an use after free during a SMP race condition that affects all processes if CONFIG_USERFAULTFD=y. Very hard to reproduce though and probably impossible without SLUB poisoning enabled. This patch (of 3): I once reproduced this oops with the userfaultfd selftest, it's not easily reproducible and it requires SLUB poisoning to reproduce. general protection fault: 0000 [#1] SMP Modules linked in: CPU: 2 PID: 18421 Comm: userfaultfd Tainted: G ------------ T 3.10.0+ #15 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014 task: ffff8801f83b9440 ti: ffff8801f833c000 task.ti: ffff8801f833c000 RIP: 0010:[] [] userfaultfd_exit+0x29/0xa0 RSP: 0018:ffff8801f833fe80 EFLAGS: 00010202 RAX: ffff8801f833ffd8 RBX: 6b6b6b6b6b6b6b6b RCX: ffff8801f83b9440 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8800baf18600 RBP: ffff8801f833fee8 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: ffffffff8127ceb3 R12: 0000000000000000 R13: ffff8800baf186b0 R14: ffff8801f83b99f8 R15: 00007faed746c700 FS: 0000000000000000(0000) GS:ffff88023fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007faf0966f028 CR3: 0000000001bc6000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Call Trace: do_exit+0x297/0xd10 SyS_exit+0x17/0x20 tracesys+0xdd/0xe2 Code: 00 00 66 66 66 66 90 55 48 89 e5 41 54 53 48 83 ec 58 48 8b 1f 48 85 db 75 11 eb 73 66 0f 1f 44 00 00 48 8b 5b 10 48 85 db 74 64 <4c> 8b a3 b8 00 00 00 4d 85 e4 74 eb 41 f6 84 24 2c 01 00 00 80 RIP [] userfaultfd_exit+0x29/0xa0 RSP ---[ end trace 9fecd6dcb442846a ]--- In the debugger I located the "mm" pointer in the stack and walking mm->mmap->vm_next through the end shows the vma->vm_next list is fully consistent and it is null terminated list as expected. So this has to be an SMP race condition where userfaultfd_exit was running while the vma list was being modified by another CPU. When userfaultfd_exit() run one of the ->vm_next pointers pointed to SLAB_POISON (RBX is the vma pointer and is 0x6b6b..). The reason is that it's not running in __mmput but while there are still other threads running and it's not holding the mmap_sem (it can't as it has to wait the even to be received by the manager). So this is an use after free that was happening for all processes. One more implementation problem aside from the race condition: userfaultfd_exit has really to check a flag in mm->flags before walking the vma or it's going to slowdown the exit() path for regular tasks. One more implementation problem: at that point signals can't be delivered so it would also create a task in D state if the manager doesn't read the event. The major design issue: it overall looks superfluous as the manager can check for -ENOSPC in the background transfer: if (mmget_not_zero(ctx->mm)) { [..] } else { return -ENOSPC; } It's safer to roll it back and re-introduce it later if at all. [rppt@linux.vnet.ibm.com: documentation fixup after removal of UFFD_EVENT_EXIT] Link: http://lkml.kernel.org/r/1488345437-4364-1-git-send-email-rppt@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/20170224181957.19736-2-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Signed-off-by: Mike Rapoport Acked-by: Mike Rapoport Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Pavel Emelyanov Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/userfaultfd_k.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 0468548acebf..f2b79bf4c895 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -72,8 +72,6 @@ extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, extern void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf); -extern void userfaultfd_exit(struct mm_struct *mm); - #else /* CONFIG_USERFAULTFD */ /* mm helpers */ @@ -139,10 +137,6 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm, { } -static inline void userfaultfd_exit(struct mm_struct *mm) -{ -} - #endif /* CONFIG_USERFAULTFD */ #endif /* _LINUX_USERFAULTFD_K_H */ -- cgit v1.2.3 From cbfd0c1001bedb4b051cf4a1f5df24f1500381bc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 9 Mar 2017 16:16:57 -0800 Subject: include/linux/fs.h: fix unsigned enum warning with gcc-4.2 With arm-linux-gcc-4.2, almost every file we build in the kernel ends up with this warning: include/linux/fs.h:2648: warning: comparison of unsigned expression < 0 is always false Later versions don't have this problem, but it's easy enough to work around. Link: http://lkml.kernel.org/r/20161216105634.235457-12-arnd@arndb.de Signed-off-by: Arnd Bergmann Cc: Russell King Cc: Brendan Gregg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index aad3fd0ff5f8..7251f7bb45e8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2678,7 +2678,7 @@ static const char * const kernel_read_file_str[] = { static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id) { - if (id < 0 || id >= READING_MAX_ID) + if ((unsigned)id >= READING_MAX_ID) return kernel_read_file_str[READING_UNKNOWN]; return kernel_read_file_str[id]; -- cgit v1.2.3 From ce9311cf95ad8baf044a014738d76973d93b739a Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Thu, 9 Mar 2017 16:17:00 -0800 Subject: mm/vmstats: add thp_split_pud event for clarity We added support for PUD-sized transparent hugepages, however we count the event "thp split pud" into thp_split_pmd event. To separate the event count of thp split pud from pmd, add a new event named thp_split_pud. Link: http://lkml.kernel.org/r/1488282380-5076-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Yisheng Xie Cc: Vlastimil Babka Cc: Johannes Weiner Cc: Michal Hocko Cc: Joonsoo Kim Cc: Sebastian Siewior Cc: Hugh Dickins Cc: Christoph Lameter Cc: Kirill A. Shutemov Cc: Aneesh Kumar K.V Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Ebru Akagunduz Cc: David Rientjes Cc: Hanjun Guo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 6aa1b6cb5828..a80b7b59cf33 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -79,6 +79,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_SPLIT_PAGE_FAILED, THP_DEFERRED_SPLIT_PAGE, THP_SPLIT_PMD, +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + THP_SPLIT_PUD, +#endif THP_ZERO_PAGE_ALLOC, THP_ZERO_PAGE_ALLOC_FAILED, #endif -- cgit v1.2.3 From 70ccb92fdd90b35bb6f9200093d4ffd6cb38156b Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 9 Mar 2017 16:17:11 -0800 Subject: userfaultfd: non-cooperative: userfaultfd_remove revalidate vma in MADV_DONTNEED userfaultfd_remove() has to be execute before zapping the pagetables or UFFDIO_COPY could keep filling pages after zap_page_range returned, which would result in non zero data after a MADV_DONTNEED. However userfaultfd_remove() may have to release the mmap_sem. This was handled correctly in MADV_REMOVE, but MADV_DONTNEED accessed a potentially stale vma (the very vma passed to zap_page_range(vma, ...)). The fix consists in revalidating the vma in case userfaultfd_remove() had to release the mmap_sem. This also optimizes away an unnecessary down_read/up_read in the MADV_REMOVE case if UFFD_EVENT_FORK had to be delivered. It all remains zero runtime cost in case CONFIG_USERFAULTFD=n as userfaultfd_remove() will be defined as "true" at build time. Link: http://lkml.kernel.org/r/20170302173738.18994-3-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Acked-by: Mike Rapoport Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Pavel Emelyanov Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/userfaultfd_k.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index f2b79bf4c895..48a3483dccb1 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -61,8 +61,7 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, unsigned long from, unsigned long to, unsigned long len); -extern void userfaultfd_remove(struct vm_area_struct *vma, - struct vm_area_struct **prev, +extern bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -118,11 +117,11 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, { } -static inline void userfaultfd_remove(struct vm_area_struct *vma, - struct vm_area_struct **prev, +static inline bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + return true; } static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, -- cgit v1.2.3 From cdfbabfb2f0ce983fdaa42f20e5f7842178fc01e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 9 Mar 2017 08:09:05 +0000 Subject: net: Work around lockdep limitation in sockets that use sockets Lockdep issues a circular dependency warning when AFS issues an operation through AF_RXRPC from a context in which the VFS/VM holds the mmap_sem. The theory lockdep comes up with is as follows: (1) If the pagefault handler decides it needs to read pages from AFS, it calls AFS with mmap_sem held and AFS begins an AF_RXRPC call, but creating a call requires the socket lock: mmap_sem must be taken before sk_lock-AF_RXRPC (2) afs_open_socket() opens an AF_RXRPC socket and binds it. rxrpc_bind() binds the underlying UDP socket whilst holding its socket lock. inet_bind() takes its own socket lock: sk_lock-AF_RXRPC must be taken before sk_lock-AF_INET (3) Reading from a TCP socket into a userspace buffer might cause a fault and thus cause the kernel to take the mmap_sem, but the TCP socket is locked whilst doing this: sk_lock-AF_INET must be taken before mmap_sem However, lockdep's theory is wrong in this instance because it deals only with lock classes and not individual locks. The AF_INET lock in (2) isn't really equivalent to the AF_INET lock in (3) as the former deals with a socket entirely internal to the kernel that never sees userspace. This is a limitation in the design of lockdep. Fix the general case by: (1) Double up all the locking keys used in sockets so that one set are used if the socket is created by userspace and the other set is used if the socket is created by the kernel. (2) Store the kern parameter passed to sk_alloc() in a variable in the sock struct (sk_kern_sock). This informs sock_lock_init(), sock_init_data() and sk_clone_lock() as to the lock keys to be used. Note that the child created by sk_clone_lock() inherits the parent's kern setting. (3) Add a 'kern' parameter to ->accept() that is analogous to the one passed in to ->create() that distinguishes whether kernel_accept() or sys_accept4() was the caller and can be passed to sk_alloc(). Note that a lot of accept functions merely dequeue an already allocated socket. I haven't touched these as the new socket already exists before we get the parameter. Note also that there are a couple of places where I've made the accepted socket unconditionally kernel-based: irda_accept() rds_rcp_accept_one() tcp_accept_from_sock() because they follow a sock_create_kern() and accept off of that. Whilst creating this, I noticed that lustre and ocfs don't create sockets through sock_create_kern() and thus they aren't marked as for-kernel, though they appear to be internal. I wonder if these should do that so that they use the new set of lock keys. Signed-off-by: David Howells Signed-off-by: David S. Miller --- include/linux/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index cd0c8bd0a1de..0620f5e18c96 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -146,7 +146,7 @@ struct proto_ops { int (*socketpair)(struct socket *sock1, struct socket *sock2); int (*accept) (struct socket *sock, - struct socket *newsock, int flags); + struct socket *newsock, int flags, bool kern); int (*getname) (struct socket *sock, struct sockaddr *addr, int *sockaddr_len, int peer); -- cgit v1.2.3 From 40c50c1fecdf012a3bf055ec813f0ef2eda2749c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Mar 2017 13:17:18 +0100 Subject: kexec, x86/purgatory: Unbreak it and clean it up The purgatory code defines global variables which are referenced via a symbol lookup in the kexec code (core and arch). A recent commit addressing sparse warnings made these static and thereby broke kexec_file. Why did this happen? Simply because the whole machinery is undocumented and lacks any form of forward declarations. The variable names are unspecific and lack a prefix, so adding forward declarations creates shadow variables in the core code. Aside of that the code relies on magic constants and duplicate struct definitions with no way to ensure that these things stay in sync. The section placement of the purgatory variables happened by chance and not by design. Unbreak kexec and cleanup the mess: - Add proper forward declarations and document the usage - Use common struct definition - Use the proper common defines instead of magic constants - Add a purgatory_ prefix to have a proper name space - Use ARRAY_SIZE() instead of a homebrewn reimplementation - Add proper sections to the purgatory variables [ From Mike ] Fixes: 72042a8c7b01 ("x86/purgatory: Make functions and variables static") Reported-by: Mike Galbraith < Signed-off-by: Thomas Gleixner Cc: Nicholas Mc Guire Cc: Borislav Petkov Cc: Vivek Goyal Cc: "Tobin C. Harding" Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703101315140.3681@nanos Signed-off-by: Thomas Gleixner --- include/linux/purgatory.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/linux/purgatory.h (limited to 'include/linux') diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h new file mode 100644 index 000000000000..d60d4e278609 --- /dev/null +++ b/include/linux/purgatory.h @@ -0,0 +1,23 @@ +#ifndef _LINUX_PURGATORY_H +#define _LINUX_PURGATORY_H + +#include +#include +#include + +struct kexec_sha_region { + unsigned long start; + unsigned long len; +}; + +/* + * These forward declarations serve two purposes: + * + * 1) Make sparse happy when checking arch/purgatory + * 2) Document that these are required to be global so the symbol + * lookup in kexec works + */ +extern struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX]; +extern u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE]; + +#endif -- cgit v1.2.3 From c962cff17dfa11f4a8227ac16de2b28aea3312e4 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:23 +0800 Subject: Revert "x86/acpi: Set persistent cpuid <-> nodeid mapping when booting" Revert: dc6db24d2476 ("x86/acpi: Set persistent cpuid <-> nodeid mapping when booting") The mapping of "cpuid <-> nodeid" is established at boot time via ACPI tables to keep associations of workqueues and other node related items consistent across cpu hotplug. But, ACPI tables are unreliable and failures with that boot time mapping have been reported on machines where the ACPI table and the physical information which is retrieved at actual hotplug is inconsistent. Revert the mapping implementation so it can be replaced with a less error prone approach. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-2-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- include/linux/acpi.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 673acda012af..63a7519b00cc 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -294,11 +294,8 @@ bool acpi_processor_validate_proc_id(int proc_id); int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu); int acpi_unmap_cpu(int cpu); -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ -void acpi_set_processor_mapping(void); - #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); #endif -- cgit v1.2.3 From a77d6cd968497792e072b74dff45b891ba778ddb Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:27 +0800 Subject: acpi/processor: Check for duplicate processor ids at hotplug time The check for duplicate processor ids happens at boot time based on the ACPI table contents, but the final sanity checks for a processor happen at hotplug time. At hotplug time, where the physical information is available, which might differ from the ACPI table information, a check for duplicate processor ids is missing. Add it to the hotplug checks and rename the function so it better reflects its purpose. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-6-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 63a7519b00cc..9b05886f9773 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -287,7 +287,7 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id) } /* Validate the processor object's proc_id */ -bool acpi_processor_validate_proc_id(int proc_id); +bool acpi_duplicate_processor_id(int proc_id); #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ -- cgit v1.2.3 From 65869a47f3488253f5fd88cc4f14e0a4e2601a55 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 11 Mar 2017 16:55:49 +0100 Subject: bpf: improve read-only handling Improve bpf_{prog,jit_binary}_{un,}lock_ro() by throwing a one-time warning in case of an error when the image couldn't be set read-only, and also mark struct bpf_prog as locked when bpf_prog_lock_ro() was called. Reason for the latter is that bpf_prog_unlock_ro() is called from various places including error paths, and we shouldn't mess with page attributes when really not needed. For bpf_jit_binary_unlock_ro() this is not needed as jited flag implicitly indicates this, thus for archs with ARCH_HAS_SET_MEMORY we're guaranteed to have a previously locked image. Overall, this should also help us to identify any further potential issues with set_memory_*() helpers. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0c167fdee5f7..fbf7b39e8103 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -409,6 +409,7 @@ struct bpf_prog { u16 pages; /* Number of allocated pages */ kmemcheck_bitfield_begin(meta); u16 jited:1, /* Is our filter JIT'ed? */ + locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ dst_needed:1, /* Do we need dst entry? */ @@ -554,22 +555,29 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog) #ifdef CONFIG_ARCH_HAS_SET_MEMORY static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { - set_memory_ro((unsigned long)fp, fp->pages); + fp->locked = 1; + WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages)); } static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { - set_memory_rw((unsigned long)fp, fp->pages); + if (fp->locked) { + WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages)); + /* In case set_memory_rw() fails, we want to be the first + * to crash here instead of some random place later on. + */ + fp->locked = 0; + } } static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) { - set_memory_ro((unsigned long)hdr, hdr->pages); + WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages)); } static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) { - set_memory_rw((unsigned long)hdr, hdr->pages); + WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages)); } #else static inline void bpf_prog_lock_ro(struct bpf_prog *fp) -- cgit v1.2.3 From c42f8218610aa09d7d3795e5810387673c1f84b6 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 9 Mar 2017 17:20:04 +0100 Subject: iio: sw-device: Fix config group initialization Use the IS_ENABLED() helper macro to ensure that the configfs group is initialized either when configfs is built-in or when configfs is built as a module. Otherwise software device creation will result in undefined behaviour when configfs is built as a module since the configfs group for the device not properly initialized. Similar to commit b2f0c09664b7 ("iio: sw-trigger: Fix config group initialization"). Fixes: 0f3a8c3f34f7 ("iio: Add support for creating IIO devices via configfs") Reported-by: Miguel Robles Signed-off-by: Lars-Peter Clausen Acked-by: Daniel Baluta Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/sw_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/sw_device.h b/include/linux/iio/sw_device.h index 23ca41515527..fa7931933067 100644 --- a/include/linux/iio/sw_device.h +++ b/include/linux/iio/sw_device.h @@ -62,7 +62,7 @@ void iio_swd_group_init_type_name(struct iio_sw_device *d, const char *name, struct config_item_type *type) { -#ifdef CONFIG_CONFIGFS_FS +#if IS_ENABLED(CONFIG_CONFIGFS_FS) config_group_init_type_name(&d->group, name, type); #endif } -- cgit v1.2.3 From 3243367b209faed5c320a4e5f9a565ee2a2ba958 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Mon, 13 Mar 2017 20:50:08 +0100 Subject: usb-core: Add LINEAR_FRAME_INTR_BINTERVAL USB quirk Some USB 2.0 devices erroneously report millisecond values in bInterval. The generic config code manages to catch most of them, but in some cases it's not completely enough. The case at stake here is a USB 2.0 braille device, which wants to announce 10ms and thus sets bInterval to 10, but with the USB 2.0 computation that yields to 64ms. It happens that one can type fast enough to reach this interval and get the device buffers overflown, leading to problematic latencies. The generic config code does not catch this case because the 64ms is considered a sane enough value. This change thus adds a USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL quirk to mark devices which actually report milliseconds in bInterval, and marks Vario Ultra devices as needing it. Signed-off-by: Samuel Thibault Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/quirks.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index 1d0043dc34e4..de2a722fe3cf 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -50,4 +50,10 @@ /* device can't handle Link Power Management */ #define USB_QUIRK_NO_LPM BIT(10) +/* + * Device reports its bInterval as linear frames instead of the + * USB 2.0 calculation. + */ +#define USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL BIT(11) + #endif /* __LINUX_USB_QUIRKS_H */ -- cgit v1.2.3 From 0043c1dfbec7b6e2427409059b05347d6f51aa9f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 8 Feb 2017 09:24:25 +0000 Subject: serial: st-asc: Use new GPIOD API to obtain RTS pin The commits mentioned below adapt the GPIO API to allow more information to be passed directly through devm_get_gpiod_from_child() in the first instance. This facilitates the removal of subsequent calls, such as gpiod_direction_output(). This patch firstly moves to utilise the new API and secondly removes the now superfluous call do set the direction. Reported-by: Stephen Rothwell Suggested-by: Boris Brezillon Signed-off-by: Lee Jones [Also drop the header file dummies that only this driver was using] Acked-by: Greg Kroah-Hartman Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 2484b2fcc6eb..933d93656605 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -143,15 +143,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, struct fwnode_handle *child, enum gpiod_flags flags, const char *label); -/* FIXME: delete this helper when users are switched over */ -static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, - const char *con_id, struct fwnode_handle *child) -{ - return devm_fwnode_get_index_gpiod_from_child(dev, con_id, - 0, child, - GPIOD_ASIS, - "?"); -} #else /* CONFIG_GPIOLIB */ @@ -444,13 +435,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, return ERR_PTR(-ENOSYS); } -/* FIXME: delete this when all users are switched over */ -static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, - const char *con_id, struct fwnode_handle *child) -{ - return ERR_PTR(-ENOSYS); -} - #endif /* CONFIG_GPIOLIB */ static inline -- cgit v1.2.3 From 94840e3c802daa1a62985957f36ac48faf8ceedd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 22 Feb 2017 13:25:14 -0800 Subject: fscrypt: eliminate ->prepare_context() operation The only use of the ->prepare_context() fscrypt operation was to allow ext4 to evict inline data from the inode before ->set_context(). However, there is no reason why this cannot be done as simply the first step in ->set_context(), and in fact it makes more sense to do it that way because then the policy modes and flags get validated before any real work is done. Therefore, merge ext4_prepare_context() into ext4_set_context(), and remove ->prepare_context(). Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_common.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h index 547f81592ba1..10c1abfbac6c 100644 --- a/include/linux/fscrypt_common.h +++ b/include/linux/fscrypt_common.h @@ -87,7 +87,6 @@ struct fscrypt_operations { unsigned int flags; const char *key_prefix; int (*get_context)(struct inode *, void *, size_t); - int (*prepare_context)(struct inode *); int (*set_context)(struct inode *, const void *, size_t, void *); int (*dummy_context)(struct inode *); bool (*is_encrypted)(struct inode *); -- cgit v1.2.3 From 8200f2085abe7f29a016381f3122000cc7b2a760 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 4 Mar 2017 18:13:57 -0700 Subject: vmbus: use rcu for per-cpu channel list The per-cpu channel list is now referred to in the interrupt routine. This is mostly safe since the host will not normally generate an interrupt when channel is being deleted but if it did then there would be a use after free problem. To solve, this use RCU protection on ther per-cpu list. Fixes: 631e63a9f346 ("vmbus: change to per channel tasklet") Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 62bbf3c1aa4a..c4c7ae91f9d1 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -845,6 +845,13 @@ struct vmbus_channel { * link up channels based on their CPU affinity. */ struct list_head percpu_list; + + /* + * Defer freeing channel until after all cpu's have + * gone through grace period. + */ + struct rcu_head rcu; + /* * For performance critical channels (storage, networking * etc,), Hyper-V has a mechanism to enhance the throughput -- cgit v1.2.3 From dad72a1d28442b03aac86836a42de2d00a1014ab Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sat, 4 Mar 2017 18:13:58 -0700 Subject: vmbus: remove hv_event_tasklet_disable/enable With the recent introduction of per-channel tasklet, we need to update the way we handle the 3 concurrency issues: 1. hv_process_channel_removal -> percpu_channel_deq vs. vmbus_chan_sched -> list_for_each_entry(..., percpu_list); 2. vmbus_process_offer -> percpu_channel_enq/deq vs. vmbus_chan_sched. 3. vmbus_close_internal vs. the per-channel tasklet vmbus_on_event; The first 2 issues can be handled by Stephen's recent patch "vmbus: use rcu for per-cpu channel list", and the third issue can be handled by calling tasklet_disable in vmbus_close_internal here. We don't need the original hv_event_tasklet_disable/enable since we now use per-channel tasklet instead of the previous per-CPU tasklet, and actually we must remove them due to the side effect now: vmbus_process_offer -> hv_event_tasklet_enable -> tasklet_schedule will start the per-channel callback prematurely, cauing NULL dereferencing (the channel may haven't been properly configured to run the callback yet). Fixes: 631e63a9f346 ("vmbus: change to per channel tasklet") Signed-off-by: Dexuan Cui Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Tested-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index c4c7ae91f9d1..970771a5f739 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1437,9 +1437,6 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, const int *srv_version, int srv_vercnt, int *nego_fw_version, int *nego_srv_version); -void hv_event_tasklet_disable(struct vmbus_channel *channel); -void hv_event_tasklet_enable(struct vmbus_channel *channel); - void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); void vmbus_setevent(struct vmbus_channel *channel); -- cgit v1.2.3 From 7c468447f40645fbf2a033dfdaa92b1957130d50 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Fri, 10 Mar 2017 12:28:18 -0600 Subject: crypto: ccp - Assign DMA commands to the channel's CCP The CCP driver generally uses a round-robin approach when assigning operations to available CCPs. For the DMA engine, however, the DMA mappings of the SGs are associated with a specific CCP. When an IOMMU is enabled, the IOMMU is programmed based on this specific device. If the DMA operations are not performed by that specific CCP then addressing errors and I/O page faults will occur. Update the CCP driver to allow a specific CCP device to be requested for an operation and use this in the DMA engine support. Cc: # 4.9.x- Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- include/linux/ccp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ccp.h b/include/linux/ccp.h index c71dd8fa5764..c41b8d99dd0e 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -556,7 +556,7 @@ enum ccp_engine { * struct ccp_cmd - CCP operation request * @entry: list element (ccp driver use only) * @work: work element used for callbacks (ccp driver use only) - * @ccp: CCP device to be run on (ccp driver use only) + * @ccp: CCP device to be run on * @ret: operation return code (ccp driver use only) * @flags: cmd processing flags * @engine: CCP operation to perform -- cgit v1.2.3 From 5be9b730b09c45c358bbfe7f51d254e306cccc07 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 16 Mar 2017 16:40:21 -0700 Subject: kasan: add a prototype of task_struct to avoid warning Add a prototype of task_struct to fix below warning on arm64. In file included from arch/arm64/kernel/probes/kprobes.c:19:0: include/linux/kasan.h:81:132: error: 'struct task_struct' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] static inline void kasan_unpoison_task_stack(struct task_struct *task) {} As same as other types (kmem_cache, page, and vm_struct) this adds a prototype of task_struct data structure on top of kasan.h. [arnd] A related warning was fixed before, but now appears in a different line in the same file in v4.11-rc2. The patch from Masami Hiramatsu still seems appropriate, so let's take his version. Fixes: 71af2ed5eeea ("kasan, sched/headers: Remove from ") Link: https://patchwork.kernel.org/patch/9569839/ Link: http://lkml.kernel.org/r/20170313141517.3397802-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Signed-off-by: Masami Hiramatsu Acked-by: Alexander Potapenko Acked-by: Andrey Ryabinin Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 1c823bef4c15..5734480c9590 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -6,6 +6,7 @@ struct kmem_cache; struct page; struct vm_struct; +struct task_struct; #ifdef CONFIG_KASAN -- cgit v1.2.3 From 15c9e10d9ad4d41d076148bbff1de7f659f68852 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 16 Mar 2017 16:40:33 -0700 Subject: drivers core: remove assert_held_device_hotplug() The last caller of assert_held_device_hotplug() is gone, so remove it again. Link: http://lkml.kernel.org/r/20170314125226.16779-3-heiko.carstens@de.ibm.com Signed-off-by: Heiko Carstens Acked-by: Dan Williams Cc: Michal Hocko Cc: "Rafael J. Wysocki" Cc: Vladimir Davydov Cc: Ben Hutchings Cc: Gerald Schaefer Cc: Martin Schwidefsky Cc: Sebastian Ott Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 30c4570e928d..9ef518af5515 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1140,7 +1140,6 @@ static inline bool device_supports_offline(struct device *dev) extern void lock_device_hotplug(void); extern void unlock_device_hotplug(void); extern int lock_device_hotplug_sysfs(void); -void assert_held_device_hotplug(void); extern int device_offline(struct device *dev); extern int device_online(struct device *dev); extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); -- cgit v1.2.3 From 4cbe4dac82e423ecc9a0ba46af24a860853259f4 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 13 Mar 2017 19:29:08 +0200 Subject: net/mlx4_core: Avoid delays during VF driver device shutdown Some Hypervisors detach VFs from VMs by instantly causing an FLR event to be generated for a VF. In the mlx4 case, this will cause that VF's comm channel to be disabled before the VM has an opportunity to invoke the VF device's "shutdown" method. For such Hypervisors, there is a race condition between the VF's shutdown method and its internal-error detection/reset thread. The internal-error detection/reset thread (which runs every 5 seconds) also detects a disabled comm channel. If the internal-error detection/reset flow wins the race, we still get delays (while that flow tries repeatedly to detect comm-channel recovery). The cited commit fixed the command timeout problem when the internal-error detection/reset flow loses the race. This commit avoids the unneeded delays when the internal-error detection/reset flow wins. Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown") Signed-off-by: Jack Morgenstein Reported-by: Simon Xiao Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7e66e4f62858..1beb1ec2fbdf 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -476,6 +476,7 @@ enum { enum { MLX4_INTERFACE_STATE_UP = 1 << 0, MLX4_INTERFACE_STATE_DELETION = 1 << 1, + MLX4_INTERFACE_STATE_NOWAIT = 1 << 2, }; #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ -- cgit v1.2.3 From 0ca10b60ceeb5372da01798ca68c116ae45a6eb6 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 20 Mar 2017 11:25:16 +0100 Subject: reset: fix optional reset_control_get stubs to return NULL When RESET_CONTROLLER is not enabled, the optional reset_control_get stubs should now also return NULL. Since it is now valid for reset_control_assert/deassert/reset/status/put to be called unconditionally, with NULL as an argument for optional resets, the stubs are not allowed to warn anymore. Fixes: bb475230b8e5 ("reset: make optional functions really optional") Reported-by: Andrzej Hajda Tested-by: Andrzej Hajda Reviewed-by: Andrzej Hajda Cc: Ramiro Oliveira Signed-off-by: Philipp Zabel --- include/linux/reset.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/reset.h b/include/linux/reset.h index 86b4ed75359e..96fb139bdd08 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -31,31 +31,26 @@ static inline int device_reset_optional(struct device *dev) static inline int reset_control_reset(struct reset_control *rstc) { - WARN_ON(1); return 0; } static inline int reset_control_assert(struct reset_control *rstc) { - WARN_ON(1); return 0; } static inline int reset_control_deassert(struct reset_control *rstc) { - WARN_ON(1); return 0; } static inline int reset_control_status(struct reset_control *rstc) { - WARN_ON(1); return 0; } static inline void reset_control_put(struct reset_control *rstc) { - WARN_ON(1); } static inline int __must_check device_reset(struct device *dev) @@ -74,14 +69,14 @@ static inline struct reset_control *__of_reset_control_get( const char *id, int index, bool shared, bool optional) { - return ERR_PTR(-ENOTSUPP); + return optional ? NULL : ERR_PTR(-ENOTSUPP); } static inline struct reset_control *__devm_reset_control_get( struct device *dev, const char *id, int index, bool shared, bool optional) { - return ERR_PTR(-ENOTSUPP); + return optional ? NULL : ERR_PTR(-ENOTSUPP); } #endif /* CONFIG_RESET_CONTROLLER */ -- cgit v1.2.3 From 36d277bac8080202684e67162ebb157f16631581 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 15 Mar 2017 09:32:14 +0800 Subject: vsock: track pkt owner vsock So that we can cancel a queued pkt later if necessary. Signed-off-by: Peng Tao Signed-off-by: David S. Miller --- include/linux/virtio_vsock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 9638bfeb0d1f..584f9a647ad4 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -48,6 +48,8 @@ struct virtio_vsock_pkt { struct virtio_vsock_hdr hdr; struct work_struct work; struct list_head list; + /* socket refcnt not held, only use for cancellation */ + struct vsock_sock *vsk; void *buf; u32 len; u32 off; @@ -56,6 +58,7 @@ struct virtio_vsock_pkt { struct virtio_vsock_pkt_info { u32 remote_cid, remote_port; + struct vsock_sock *vsk; struct msghdr *msg; u32 pkt_len; u16 type; -- cgit v1.2.3 From 4ef1b2869447411ad3ef91ad7d4891a83c1a509a Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Sat, 18 Mar 2017 17:03:00 -0400 Subject: tcp: mark skbs with SCM_TIMESTAMPING_OPT_STATS SOF_TIMESTAMPING_OPT_STATS can be enabled and disabled while packets are collected on the error queue. So, checking SOF_TIMESTAMPING_OPT_STATS in sk->sk_tsflags is not enough to safely assume that the skb contains OPT_STATS data. Add a bit in sock_exterr_skb to indicate whether the skb contains opt_stats data. Fixes: 1c885808e456 ("tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING") Reported-by: JongHwan Kim Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Eric Dumazet Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/errqueue.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h index 9ca23fcfb5d7..6fdfc884fdeb 100644 --- a/include/linux/errqueue.h +++ b/include/linux/errqueue.h @@ -20,6 +20,8 @@ struct sock_exterr_skb { struct sock_extended_err ee; u16 addr_offset; __be16 port; + u8 opt_stats:1, + unused:7; }; #endif -- cgit v1.2.3 From a5023a99393dab276069cd60dad3e61d57720fda Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Fri, 17 Mar 2017 00:28:56 +0100 Subject: hwmon: Add missing HWMON_T_ALARM Unfortunately the HWMON_T_ALARM define was missing, although the associated entry was present in hwmon_temp_attributes. This is needed to convert drivers to the new interface which use channel based alarms. Signed-off-by: Peter Huewe Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 78d59dba563e..88b673749121 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -88,6 +88,7 @@ enum hwmon_temp_attributes { #define HWMON_T_CRIT_HYST BIT(hwmon_temp_crit_hyst) #define HWMON_T_EMERGENCY BIT(hwmon_temp_emergency) #define HWMON_T_EMERGENCY_HYST BIT(hwmon_temp_emergency_hyst) +#define HWMON_T_ALARM BIT(hwmon_temp_alarm) #define HWMON_T_MIN_ALARM BIT(hwmon_temp_min_alarm) #define HWMON_T_MAX_ALARM BIT(hwmon_temp_max_alarm) #define HWMON_T_CRIT_ALARM BIT(hwmon_temp_crit_alarm) -- cgit v1.2.3 From 9d3a4de4cb8db8e71730e36736272ef041836f68 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 16 Mar 2017 17:00:16 +0000 Subject: iommu: Disambiguate MSI region types The introduction of reserved regions has left a couple of rough edges which we could do with sorting out sooner rather than later. Since we are not yet addressing the potential dynamic aspect of software-managed reservations and presenting them at arbitrary fixed addresses, it is incongruous that we end up displaying hardware vs. software-managed MSI regions to userspace differently, especially since ARM-based systems may actually require one or the other, or even potentially both at once, (which iommu-dma currently has no hope of dealing with at all). Let's resolve the former user-visible inconsistency ASAP before the ABI has been baked into a kernel release, in a way that also lays the groundwork for the latter shortcoming to be addressed by follow-up patches. For clarity, rename the software-managed type to IOMMU_RESV_SW_MSI, use IOMMU_RESV_MSI to describe the hardware type, and document everything a little bit. Since the x86 MSI remapping hardware falls squarely under this meaning of IOMMU_RESV_MSI, apply that type to their regions as well, so that we tell the same story to userspace across all platforms. Secondly, as the various region types require quite different handling, and it really makes little sense to ever try combining them, convert the bitfield-esque #defines to a plain enum in the process before anyone gets the wrong impression. Fixes: d30ddcaa7b02 ("iommu: Add a new type field in iommu_resv_region") Reviewed-by: Eric Auger CC: Alex Williamson CC: David Woodhouse CC: kvm@vger.kernel.org Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 6a6de187ddc0..2e4de0deee53 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -125,9 +125,16 @@ enum iommu_attr { }; /* These are the possible reserved region types */ -#define IOMMU_RESV_DIRECT (1 << 0) -#define IOMMU_RESV_RESERVED (1 << 1) -#define IOMMU_RESV_MSI (1 << 2) +enum iommu_resv_type { + /* Memory regions which must be mapped 1:1 at all times */ + IOMMU_RESV_DIRECT, + /* Arbitrary "never map this or give it to a device" address ranges */ + IOMMU_RESV_RESERVED, + /* Hardware MSI region (untranslated) */ + IOMMU_RESV_MSI, + /* Software-managed MSI translation window */ + IOMMU_RESV_SW_MSI, +}; /** * struct iommu_resv_region - descriptor for a reserved memory region @@ -142,7 +149,7 @@ struct iommu_resv_region { phys_addr_t start; size_t length; int prot; - int type; + enum iommu_resv_type type; }; #ifdef CONFIG_IOMMU_API @@ -288,7 +295,8 @@ extern void iommu_get_resv_regions(struct device *dev, struct list_head *list); extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); extern int iommu_request_dm_for_dev(struct device *dev); extern struct iommu_resv_region * -iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, int type); +iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, + enum iommu_resv_type type); extern int iommu_get_group_resv_regions(struct iommu_group *group, struct list_head *head); -- cgit v1.2.3 From 698eff6355f735d46d1b7113df8b422874cd7988 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Mar 2017 12:48:18 +0100 Subject: sched/clock, x86/perf: Fix "perf test tsc" People reported that commit: 5680d8094ffa ("sched/clock: Provide better clock continuity") broke "perf test tsc". That commit added another offset to the reported clock value; so take that into account when computing the provided offset values. Reported-by: Adrian Hunter Reported-by: Arnaldo Carvalho de Melo Tested-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 5680d8094ffa ("sched/clock: Provide better clock continuity") Signed-off-by: Ingo Molnar --- include/linux/sched/clock.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h index 4a68c6791207..34fe92ce1ebd 100644 --- a/include/linux/sched/clock.h +++ b/include/linux/sched/clock.h @@ -54,15 +54,16 @@ static inline u64 local_clock(void) } #else extern void sched_clock_init_late(void); -/* - * Architectures can set this to 1 if they have specified - * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, - * but then during bootup it turns out that sched_clock() - * is reliable after all: - */ extern int sched_clock_stable(void); extern void clear_sched_clock_stable(void); +/* + * When sched_clock_stable(), __sched_clock_offset provides the offset + * between local_clock() and sched_clock(). + */ +extern u64 __sched_clock_offset; + + extern void sched_clock_tick(void); extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); -- cgit v1.2.3 From 90db10434b163e46da413d34db8d0e77404cc645 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 23 Mar 2017 18:24:19 +0100 Subject: KVM: kvm_io_bus_unregister_dev() should never fail No caller currently checks the return value of kvm_io_bus_unregister_dev(). This is evil, as all callers silently go on freeing their device. A stale reference will remain in the io_bus, getting at least used again, when the iobus gets teared down on kvm_destroy_vm() - leading to use after free errors. There is nothing the callers could do, except retrying over and over again. So let's simply remove the bus altogether, print an error and make sure no one can access this broken bus again (returning -ENOMEM on any attempt to access it). Fixes: e93f8a0f821e ("KVM: convert io_bus to SRCU") Cc: stable@vger.kernel.org # 3.4+ Reported-by: Dmitry Vyukov Reviewed-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2c14ad9809da..d0250744507a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, void *val); int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev); -int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_io_device *dev); +void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_io_device *dev); struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr); -- cgit v1.2.3 From 5ea1320653359dd2ade7ff2ad81e37b790eb1f1f Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 3 Mar 2017 11:47:40 -0800 Subject: Input: serio - add fast reconnect option Devices connected to serio bus are quite slow, and to improve apparent speed of resume process, serio core resumes (reconnects) its devices asynchronously, by posting port reconnect requests to a workqueue. Unfortunately this means that if there is a dependent device of a given serio port (for example SMBus part of touchpad connected via both PS/2 and SMBus), we do not have a good way of ensuring resume order. This change allows drivers to define "fast reconnect" handlers that would be called in-line during system resume. Drivers need to ensure that these handlers are truly "fast". Reviewed-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- include/linux/serio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serio.h b/include/linux/serio.h index c733cff44e18..138a5efe863a 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -77,6 +77,7 @@ struct serio_driver { irqreturn_t (*interrupt)(struct serio *, unsigned char, unsigned int); int (*connect)(struct serio *, struct serio_driver *drv); int (*reconnect)(struct serio *); + int (*fast_reconnect)(struct serio *); void (*disconnect)(struct serio *); void (*cleanup)(struct serio *); -- cgit v1.2.3 From 07de36b378a58f1d1426829acf0ab7cf86f651f3 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Wed, 22 Mar 2017 17:32:49 +0300 Subject: clockevents: Fix syntax error in clkevt-of macro The patch fix syntax errors introduced by commit 0c8893c9095d ("clockevents: Add a clkevt-of mechanism like clksrc-of"). Fixes: 0c8893c9095d ("clockevents: Add a clkevt-of mechanism like clksrc-of") Signed-off-by: Alexander Kochetkov Signed-off-by: Daniel Lezcano --- include/linux/clockchips.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 5d3053c34fb3..6d7edc3082f9 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -229,7 +229,7 @@ static inline void tick_setup_hrtimer_broadcast(void) { } #ifdef CONFIG_CLKEVT_PROBE extern int clockevent_probe(void); -#els +#else static inline int clockevent_probe(void) { return 0; } #endif -- cgit v1.2.3 From aa0e26bb786b00eaa897e024769e299470815efe Mon Sep 17 00:00:00 2001 From: David Rivshin Date: Wed, 29 Mar 2017 00:14:16 -0700 Subject: Input: matrix_keypad - add option to drive inactive columns The gpio-matrix-keypad driver normally sets inactive columns as inputs while scanning. This does not work for all hardware, which may require the inactive columns to be actively driven in order to overcome any pull-ups/downs on the columns. Signed-off-by: David Rivshin Acked-by: Rob Herring Signed-off-by: Dmitry Torokhov --- include/linux/input/matrix_keypad.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h index 37b04a0fdea4..6174733a57eb 100644 --- a/include/linux/input/matrix_keypad.h +++ b/include/linux/input/matrix_keypad.h @@ -49,6 +49,8 @@ struct matrix_keymap_data { * @wakeup: controls whether the device should be set up as wakeup * source * @no_autorepeat: disable key autorepeat + * @drive_inactive_cols: drive inactive columns during scan, rather than + * making them inputs. * * This structure represents platform-specific data that use used by * matrix_keypad driver to perform proper initialization. @@ -73,6 +75,7 @@ struct matrix_keypad_platform_data { bool active_low; bool wakeup; bool no_autorepeat; + bool drive_inactive_cols; }; int matrix_keypad_build_keymap(const struct matrix_keymap_data *keymap_data, -- cgit v1.2.3 From 597b7305dd8bafdb3aef4957d97128bc90af8e9f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 31 Mar 2017 15:11:47 -0700 Subject: mm: move mm_percpu_wq initialization earlier Yang Li has reported that drain_all_pages triggers a WARN_ON which means that this function is called earlier than the mm_percpu_wq is initialized on arm64 with CMA configured: WARNING: CPU: 2 PID: 1 at mm/page_alloc.c:2423 drain_all_pages+0x244/0x25c Modules linked in: CPU: 2 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc1-next-20170310-00027-g64dfbc5 #127 Hardware name: Freescale Layerscape 2088A RDB Board (DT) task: ffffffc07c4a6d00 task.stack: ffffffc07c4a8000 PC is at drain_all_pages+0x244/0x25c LR is at start_isolate_page_range+0x14c/0x1f0 [...] drain_all_pages+0x244/0x25c start_isolate_page_range+0x14c/0x1f0 alloc_contig_range+0xec/0x354 cma_alloc+0x100/0x1fc dma_alloc_from_contiguous+0x3c/0x44 atomic_pool_init+0x7c/0x208 arm64_dma_init+0x44/0x4c do_one_initcall+0x38/0x128 kernel_init_freeable+0x1a0/0x240 kernel_init+0x10/0xfc ret_from_fork+0x10/0x20 Fix this by moving the whole setup_vmstat which is an initcall right now to init_mm_internals which will be called right after the WQ subsystem is initialized. Link: http://lkml.kernel.org/r/20170315164021.28532-1-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Yang Li Tested-by: Yang Li Tested-by: Xiaolong Ye Cc: Mel Gorman Cc: Vlastimil Babka Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5f01c88f0800..00a8fa7e366a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -32,6 +32,8 @@ struct user_struct; struct writeback_control; struct bdi_writeback; +void init_mm_internals(void); + #ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; -- cgit v1.2.3 From 553af430e7c981e6e8fa5007c5b7b5773acc63dd Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 31 Mar 2017 15:11:50 -0700 Subject: mm: rmap: fix huge file mmap accounting in the memcg stats Huge pages are accounted as single units in the memcg's "file_mapped" counter. Account the correct number of base pages, like we do in the corresponding node counter. Link: http://lkml.kernel.org/r/20170322005111.3156-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5af377303880..bb7250c45cb8 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -740,6 +740,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) return false; } +static inline void mem_cgroup_update_page_stat(struct page *page, + enum mem_cgroup_stat_index idx, + int nr) +{ +} + static inline void mem_cgroup_inc_page_stat(struct page *page, enum mem_cgroup_stat_index idx) { -- cgit v1.2.3 From b0845ce58379d11dcad4cdb6824a6410de260216 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 31 Mar 2017 15:12:04 -0700 Subject: kasan: report only the first error by default Disable kasan after the first report. There are several reasons for this: - Single bug quite often has multiple invalid memory accesses causing storm in the dmesg. - Write OOB access might corrupt metadata so the next report will print bogus alloc/free stacktraces. - Reports after the first easily could be not bugs by itself but just side effects of the first one. Given that multiple reports usually only do harm, it makes sense to disable kasan after the first one. If user wants to see all the reports, the boot-time parameter kasan_multi_shot must be used. [aryabinin@virtuozzo.com: wrote changelog and doc, added missing include] Link: http://lkml.kernel.org/r/20170323154416.30257-1-aryabinin@virtuozzo.com Signed-off-by: Mark Rutland Signed-off-by: Andrey Ryabinin Cc: Andrey Konovalov Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5734480c9590..a5c7046f26b4 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -76,6 +76,9 @@ size_t ksize(const void *); static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); } size_t kasan_metadata_size(struct kmem_cache *cache); +bool kasan_save_enable_multi_shot(void); +void kasan_restore_multi_shot(bool enabled); + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} -- cgit v1.2.3 From 00a06c22e9fc0a33ae0b6ca2d47938340dbcd539 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 4 Mar 2017 11:29:34 -0800 Subject: i2c: export i2c_client_type structure i2c bus has 2 different types of device belonging to the same bus and bus notifiers use device type to distinguish between adapters and clients. Previously we only had i2c_adapter_type exported, which made code wanting to work with i2c_client devices test for type not equal to adapter type. This unfortunately is not safe if we ever add another type to the bus, so let's export i2c_client_type as well. Reviewed-by: Jean Delvare Acked-by: Benjamin Tissoires Reviewed-by: Wolfram Sang Signed-off-by: Dmitry Torokhov --- include/linux/i2c.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 6b183521c616..6366989d1001 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -38,6 +38,7 @@ extern struct bus_type i2c_bus_type; extern struct device_type i2c_adapter_type; +extern struct device_type i2c_client_type; /* --- General options ------------------------------------------------ */ -- cgit v1.2.3 From 4124c4eba40256b65acb5016a1edfdd59a1960b6 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 1 Mar 2017 11:45:51 -0800 Subject: i2c: allow attaching IRQ resources to i2c_board_info Simple integer for interrupt number is not expressive enough, as it does not convey interrupt trigger type that should be used. Let's allow attaching array of resources to the board info and have i2c core parse first IRQ resource and set up interrupt trigger as needed. Reviewed-by: Wolfram Sang Signed-off-by: Dmitry Torokhov --- include/linux/i2c.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 6366989d1001..c5bd8b8daf97 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -304,6 +304,8 @@ static inline int i2c_slave_event(struct i2c_client *client, * @of_node: pointer to OpenFirmware device node * @fwnode: device node supplied by the platform firmware * @properties: additional device properties for the device + * @resources: resources associated with the device + * @num_resources: number of resources in the @resources array * @irq: stored in i2c_client.irq * * I2C doesn't actually support hardware probing, although controllers and @@ -326,6 +328,8 @@ struct i2c_board_info { struct device_node *of_node; struct fwnode_handle *fwnode; const struct property_entry *properties; + const struct resource *resources; + unsigned int num_resources; int irq; }; -- cgit v1.2.3 From e6eba3fac9a0eb2018a85505b91740e27c60fdba Mon Sep 17 00:00:00 2001 From: Rajat Jain Date: Mon, 3 Apr 2017 11:53:41 -0700 Subject: Input: cros_ec_keyb - add an EC event for sysrq Some form factors (detachables/tablets) may not have a keyboard and thus user may have to resort to using a defined EC UI to send sysrq(s) to the kernel in order to collect crash info etc. This UI typically is in the form of user pressing volume / power buttons in some specific sequence and for some specific time. Add a new EC event that allows EC to communicate the sysrq to the AP. (We're skipping event number 5 because it has been reserved for something else) Signed-off-by: Rajat Jain Acked-by: Lee Jones Signed-off-by: Dmitry Torokhov --- include/linux/mfd/cros_ec_commands.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index da1c188562bc..3ceebf6b9afb 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -2040,6 +2040,9 @@ enum ec_mkbp_event { /* The state of the switches have changed. */ EC_MKBP_EVENT_SWITCH = 4, + /* EC sent a sysrq command */ + EC_MKBP_EVENT_SYSRQ = 6, + /* Number of MKBP events */ EC_MKBP_EVENT_COUNT, }; @@ -2052,6 +2055,7 @@ union ec_response_get_next_data { uint32_t buttons; uint32_t switches; + uint32_t sysrq; } __packed; struct ec_response_get_next_event { -- cgit v1.2.3 From d99caa472c0a28dc95dd9b98c30ee46f9755181f Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 19 Feb 2017 17:21:56 -0800 Subject: Input: eeti_ts - switch to gpiod API gpiod API allows standard way of specifying GPIO polarity and takes it into account when reading or setting GPIO state. It also allows us to switch to common way of obtaining GPIO descriptor and away form legacy platform data. Reviewed-by: Daniel Mack Signed-off-by: Dmitry Torokhov --- include/linux/input/eeti_ts.h | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 include/linux/input/eeti_ts.h (limited to 'include/linux') diff --git a/include/linux/input/eeti_ts.h b/include/linux/input/eeti_ts.h deleted file mode 100644 index 16625d799b6f..000000000000 --- a/include/linux/input/eeti_ts.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef LINUX_INPUT_EETI_TS_H -#define LINUX_INPUT_EETI_TS_H - -struct eeti_ts_platform_data { - int irq_gpio; - unsigned int irq_active_high; -}; - -#endif /* LINUX_INPUT_EETI_TS_H */ - -- cgit v1.2.3