From e69f61862ab833e9b8d3c15b6ce07fd69f3bfecc Mon Sep 17 00:00:00 2001 From: Yacine Belkadi Date: Fri, 12 Jul 2013 20:45:47 +0200 Subject: sched: Fix some kernel-doc warnings When building the htmldocs (in verbose mode), scripts/kernel-doc reports the follwing type of warnings: Warning(kernel/sched/core.c:936): No description found for return value of 'task_curr' ... Fix those by: - adding the missing descriptions - using "Return" sections for the descriptions Signed-off-by: Yacine Belkadi Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1373654747-2389-1-git-send-email-yacine.belkadi.1@gmail.com [ While at it, fix the cpupri_set() explanation. ] Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 50d04b92ceda..82300247974c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1532,6 +1532,8 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) * Test if a process is not yet dead (at most zombie state) * If pid_alive fails, then pointers within the task structure * can be stale and must not be dereferenced. + * + * Return: 1 if the process is alive. 0 otherwise. */ static inline int pid_alive(struct task_struct *p) { @@ -1543,6 +1545,8 @@ static inline int pid_alive(struct task_struct *p) * @tsk: Task structure to be checked. * * Check if a task structure is the first user space task the kernel created. + * + * Return: 1 if the task structure is init. 0 otherwise. */ static inline int is_global_init(struct task_struct *tsk) { @@ -1893,6 +1897,8 @@ extern struct task_struct *idle_task(int cpu); /** * is_idle_task - is the specified task an idle task? * @p: the task in question. + * + * Return: 1 if @p is an idle task. 0 otherwise. */ static inline bool is_idle_task(const struct task_struct *p) { -- cgit v1.2.3 From b01a60be7a4a161ac0a11df30569d21a20795aef Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 5 Jul 2013 17:43:56 +0200 Subject: ssb: fix alignment of struct bcma_device_id The ARM OABI and EABI disagree on the alignment of structures with small members, so module init tools may interpret the ssb device table incorrectly, as shown by this warning when building the b43 device driver in an OABI kernel: FATAL: drivers/net/wireless/b43/b43: sizeof(struct ssb_device_id)=6 is not a modulo of the size of section __mod_ssb_device_table=88. Forcing the default (EABI) alignment on the structure makes this problem go away. Since the ssb_device_id may have the same problem, better fix both structures. Signed-off-by: Arnd Bergmann Cc: Russell King Cc: John W. Linville Cc: Michael Buesch Cc: Larry Finger Signed-off-by: John W. Linville --- include/linux/mod_devicetable.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index b62d4af6c667..45e921401b06 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -361,7 +361,8 @@ struct ssb_device_id { __u16 vendor; __u16 coreid; __u8 revision; -}; + __u8 __pad; +} __attribute__((packed, aligned(2))); #define SSB_DEVICE(_vendor, _coreid, _revision) \ { .vendor = _vendor, .coreid = _coreid, .revision = _revision, } #define SSB_DEVTABLE_END \ @@ -377,7 +378,7 @@ struct bcma_device_id { __u16 id; __u8 rev; __u8 class; -}; +} __attribute__((packed,aligned(2))); #define BCMA_CORE(_manuf, _id, _rev, _class) \ { .manuf = _manuf, .id = _id, .rev = _rev, .class = _class, } #define BCMA_CORETABLE_END \ -- cgit v1.2.3 From a1a8e1dc111d6f05e7164e851e58219d428359e1 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 16 Jul 2013 15:28:00 +0100 Subject: iio:trigger: Fix use_count race condition When using more than one trigger consumer it can happen that multiple threads perform a read-modify-update cycle on 'use_count' concurrently. This can cause updates to be lost and use_count can get stuck at non-zero value, in which case the IIO core assumes that at least one thread is still running and will wait for it to finish before running any trigger handlers again. This effectively renders the trigger disabled and a reboot is necessary before it can be used again. To fix this make use_count an atomic variable. Also set it to the number of consumers before starting the first consumer, otherwise it might happen that use_count drops to 0 even though not all consumers have been run yet. Signed-off-by: Lars-Peter Clausen Tested-by: Denis Ciocca Signed-off-by: Jonathan Cameron --- include/linux/iio/trigger.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index 3869c525b052..369cf2cd5144 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -8,6 +8,7 @@ */ #include #include +#include #ifndef _IIO_TRIGGER_H_ #define _IIO_TRIGGER_H_ @@ -61,7 +62,7 @@ struct iio_trigger { struct list_head list; struct list_head alloc_list; - int use_count; + atomic_t use_count; struct irq_chip subirq_chip; int subirq_base; -- cgit v1.2.3 From b1451e546899bc8f450773b2af02e0cd000cf1fa Mon Sep 17 00:00:00 2001 From: "Patil, Rachna" Date: Sat, 20 Jul 2013 17:27:00 +0100 Subject: iio: ti_am335x_adc: Fix wrong samples received on 1st read Previously we tried to read data form ADC even before ADC sequencer finished sampling. This led to wrong samples. We now wait on ADC status register idle bit to be set. Signed-off-by: Patil, Rachna Signed-off-by: Zubair Lutfullah Signed-off-by: Jonathan Cameron --- include/linux/mfd/ti_am335x_tscadc.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index 8d73fe29796a..db1791bb997a 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -113,11 +113,27 @@ #define CNTRLREG_8WIRE CNTRLREG_AFE_CTRL(3) #define CNTRLREG_TSCENB BIT(7) +/* FIFO READ Register */ +#define FIFOREAD_DATA_MASK (0xfff << 0) +#define FIFOREAD_CHNLID_MASK (0xf << 16) + +/* Sequencer Status */ +#define SEQ_STATUS BIT(5) + #define ADC_CLK 3000000 #define MAX_CLK_DIV 7 #define TOTAL_STEPS 16 #define TOTAL_CHANNELS 8 +/* +* ADC runs at 3MHz, and it takes +* 15 cycles to latch one data output. +* Hence the idle time for ADC to +* process one sample data would be +* around 5 micro seconds. +*/ +#define IDLE_TIMEOUT 5 /* microsec */ + #define TSCADC_CELLS 2 struct ti_tscadc_dev { -- cgit v1.2.3 From 81913283c80be8c0b7e038c26e2a611ab38394f1 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 28 Jun 2013 05:44:22 -0300 Subject: [media] v4l2: added missing mutex.h include to v4l2-ctrls.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes following error: include/media/v4l2-ctrls.h:193:15: error: field ‘_lock’ has incomplete type include/media/v4l2-ctrls.h: In function ‘v4l2_ctrl_lock’: include/media/v4l2-ctrls.h:570:2: error: implicit declaration of function ‘mutex_lock’ [-Werror=implicit-function-declaration] include/media/v4l2-ctrls.h: In function ‘v4l2_ctrl_unlock’: include/media/v4l2-ctrls.h:579:2: error: implicit declaration of function ‘mutex_unlock’ [-Werror=implicit-function-declaration] Signed-off-by: Andrzej Hajda Signed-off-by: Kyungmin Park Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-ctrls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h index 7343a27fe819..47ada23345a1 100644 --- a/include/media/v4l2-ctrls.h +++ b/include/media/v4l2-ctrls.h @@ -22,6 +22,7 @@ #define _V4L2_CTRLS_H #include +#include #include /* forward references */ -- cgit v1.2.3 From 0699a73af3811b66b1ab5650575acee5eea841ab Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Mon, 22 Jul 2013 21:32:09 +0200 Subject: firewire: fix libdc1394/FlyCap2 iso event regression Commit 18d627113b83 (firewire: prevent dropping of completed iso packet header data) was intended to be an obvious bug fix, but libdc1394 and FlyCap2 depend on the old behaviour by ignoring all returned information and thus not noticing that not all packets have been received yet. The result was that the video frame buffers would be saved before they contained the correct data. Reintroduce the old behaviour for old clients. Tested-by: Stepan Salenikovich Tested-by: Josep Bosch Cc: # 3.4+ Signed-off-by: Clemens Ladisch Signed-off-by: Stefan Richter --- include/linux/firewire.h | 1 + include/uapi/linux/firewire-cdev.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 3b0e820375ab..5d7782e42b8f 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -436,6 +436,7 @@ struct fw_iso_context { int type; int channel; int speed; + bool drop_overflow_headers; size_t header_size; union { fw_iso_callback_t sc; diff --git a/include/uapi/linux/firewire-cdev.h b/include/uapi/linux/firewire-cdev.h index d50036953497..1db453e4b550 100644 --- a/include/uapi/linux/firewire-cdev.h +++ b/include/uapi/linux/firewire-cdev.h @@ -215,8 +215,8 @@ struct fw_cdev_event_request2 { * with the %FW_CDEV_ISO_INTERRUPT bit set, when explicitly requested with * %FW_CDEV_IOC_FLUSH_ISO, or when there have been so many completed packets * without the interrupt bit set that the kernel's internal buffer for @header - * is about to overflow. (In the last case, kernels with ABI version < 5 drop - * header data up to the next interrupt packet.) + * is about to overflow. (In the last case, ABI versions < 5 drop header data + * up to the next interrupt packet.) * * Isochronous transmit events (context type %FW_CDEV_ISO_CONTEXT_TRANSMIT): * -- cgit v1.2.3 From 148519120c6d1f19ad53349683aeae9f228b0b8d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 27 Jul 2013 01:41:34 +0200 Subject: Revert "cpuidle: Quickly notice prediction failure for repeat mode" Revert commit 69a37bea (cpuidle: Quickly notice prediction failure for repeat mode), because it has been identified as the source of a significant performance regression in v3.8 and later as explained by Jeremy Eder: We believe we've identified a particular commit to the cpuidle code that seems to be impacting performance of variety of workloads. The simplest way to reproduce is using netperf TCP_RR test, so we're using that, on a pair of Sandy Bridge based servers. We also have data from a large database setup where performance is also measurably/positively impacted, though that test data isn't easily share-able. Included below are test results from 3 test kernels: kernel reverts ----------------------------------------------------------- 1) vanilla upstream (no reverts) 2) perfteam2 reverts e11538d1f03914eb92af5a1a378375c05ae8520c 3) test reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 e11538d1f03914eb92af5a1a378375c05ae8520c In summary, netperf TCP_RR numbers improve by approximately 4% after reverting 69a37beabf1f0a6705c08e879bdd5d82ff6486c4. When 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 is included, C0 residency never seems to get above 40%. Taking that patch out gets C0 near 100% quite often, and performance increases. The below data are histograms representing the %c0 residency @ 1-second sample rates (using turbostat), while under netperf test. - If you look at the first 4 histograms, you can see %c0 residency almost entirely in the 30,40% bin. - The last pair, which reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4, shows %c0 in the 80,90,100% bins. Below each kernel name are netperf TCP_RR trans/s numbers for the particular kernel that can be disclosed publicly, comparing the 3 test kernels. We ran a 4th test with the vanilla kernel where we've also set /dev/cpu_dma_latency=0 to show overall impact boosting single-threaded TCP_RR performance over 11% above baseline. 3.10-rc2 vanilla RX + c0 lock (/dev/cpu_dma_latency=0): TCP_RR trans/s 54323.78 ----------------------------------------------------------- 3.10-rc2 vanilla RX (no reverts) TCP_RR trans/s 48192.47 Receiver %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 59]: *********************************************************** 40.0000 - 50.0000 [ 1]: * 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: Sender %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 11]: *********** 40.0000 - 50.0000 [ 49]: ************************************************* 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: ----------------------------------------------------------- 3.10-rc2 perfteam2 RX (reverts commit e11538d1f03914eb92af5a1a378375c05ae8520c) TCP_RR trans/s 49698.69 Receiver %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 1]: * 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 59]: *********************************************************** 40.0000 - 50.0000 [ 0]: 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: Sender %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 2]: ** 40.0000 - 50.0000 [ 58]: ********************************************************** 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 0]: ----------------------------------------------------------- 3.10-rc2 test RX (reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 and e11538d1f03914eb92af5a1a378375c05ae8520c) TCP_RR trans/s 47766.95 Receiver %c0 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 1]: * 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 27]: *************************** 40.0000 - 50.0000 [ 2]: ** 50.0000 - 60.0000 [ 0]: 60.0000 - 70.0000 [ 2]: ** 70.0000 - 80.0000 [ 0]: 80.0000 - 90.0000 [ 0]: 90.0000 - 100.0000 [ 28]: **************************** Sender: 0.0000 - 10.0000 [ 1]: * 10.0000 - 20.0000 [ 0]: 20.0000 - 30.0000 [ 0]: 30.0000 - 40.0000 [ 11]: *********** 40.0000 - 50.0000 [ 0]: 50.0000 - 60.0000 [ 1]: * 60.0000 - 70.0000 [ 0]: 70.0000 - 80.0000 [ 3]: *** 80.0000 - 90.0000 [ 7]: ******* 90.0000 - 100.0000 [ 38]: ************************************** These results demonstrate gaining back the tendency of the CPU to stay in more responsive, performant C-states (and thus yield measurably better performance), by reverting commit 69a37beabf1f0a6705c08e879bdd5d82ff6486c4. Requested-by: Jeremy Eder Tested-by: Len Brown Cc: 3.8+ Signed-off-by: Rafael J. Wysocki --- include/linux/tick.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/tick.h b/include/linux/tick.h index 9180f4b85e6d..62bd8b72873c 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -174,10 +174,4 @@ static inline void tick_nohz_task_switch(struct task_struct *tsk) { } #endif -# ifdef CONFIG_CPU_IDLE_GOV_MENU -extern void menu_hrtimer_cancel(void); -# else -static inline void menu_hrtimer_cancel(void) {} -# endif /* CONFIG_CPU_IDLE_GOV_MENU */ - #endif -- cgit v1.2.3 From 2b44c4db2e2f1765d35163a861d301038e0c8a75 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 24 Jul 2013 17:41:33 -0700 Subject: freezer: set PF_SUSPEND_TASK flag on tasks that call freeze_processes Calling freeze_processes sets a global flag that will cause any process that calls try_to_freeze to enter the refrigerator. It skips sending a signal to the current task, but if the current task ever hits try_to_freeze, all threads will be frozen and the system will deadlock. Set a new flag, PF_SUSPEND_TASK, on the task that calls freeze_processes. The flag notifies the freezer that the thread is involved in suspend and should not be frozen. Also add a WARN_ON in thaw_processes if the caller does not have the PF_SUSPEND_TASK flag set to catch if a different task calls thaw_processes than the one that called freeze_processes, leaving a task with PF_SUSPEND_TASK permanently set on it. Threads that spawn off a task with PF_SUSPEND_TASK set (which swsusp does) will also have PF_SUSPEND_TASK set, preventing them from freezing while they are helping with suspend, but they need to be dead by the time suspend is triggered, otherwise they may run when userspace is expected to be frozen. Add a WARN_ON in thaw_processes if more than one thread has the PF_SUSPEND_TASK flag set. Reported-and-tested-by: Michael Leun Signed-off-by: Colin Cross Signed-off-by: Rafael J. Wysocki --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 50d04b92ceda..d722490da030 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1628,6 +1628,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ +#define PF_SUSPEND_TASK 0x80000000 /* this thread called freeze_processes and should not be frozen */ /* * Only the _current_ task can read/write to tsk->flags, but other -- cgit v1.2.3 From a838834b2f7cbc09b6319a1fc332c03e4d665b20 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 30 Jul 2013 16:43:55 -0400 Subject: drm: fix 64 bit drm fixed point helpers Sign bit wasn't handled properly and a small typo. Thanks to Christian for helping me sort this out. Signed-off-by: Alex Deucher --- include/drm/drm_fixed.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index f5e1168c7647..d639049a613d 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -84,12 +84,12 @@ static inline int drm_fixp2int(int64_t a) return ((s64)a) >> DRM_FIXED_POINT; } -static inline s64 drm_fixp_msbset(int64_t a) +static inline unsigned drm_fixp_msbset(int64_t a) { unsigned shift, sign = (a >> 63) & 1; for (shift = 62; shift > 0; --shift) - if ((a >> shift) != sign) + if (((a >> shift) & 1) != sign) return shift; return 0; @@ -100,9 +100,9 @@ static inline s64 drm_fixp_mul(s64 a, s64 b) unsigned shift = drm_fixp_msbset(a) + drm_fixp_msbset(b); s64 result; - if (shift > 63) { - shift = shift - 63; - a >>= shift >> 1; + if (shift > 61) { + shift = shift - 61; + a >>= (shift >> 1) + (shift & 1); b >>= shift >> 1; } else shift = 0; @@ -120,7 +120,7 @@ static inline s64 drm_fixp_mul(s64 a, s64 b) static inline s64 drm_fixp_div(s64 a, s64 b) { - unsigned shift = 63 - drm_fixp_msbset(a); + unsigned shift = 62 - drm_fixp_msbset(a); s64 result; a <<= shift; @@ -154,7 +154,7 @@ static inline s64 drm_fixp_exp(s64 x) } if (x < 0) - sum = drm_fixp_div(1, sum); + sum = drm_fixp_div(DRM_FIXED_ONE, sum); return sum; } -- cgit v1.2.3 From 9ea7187c53f63e31f2d1b2b1e474e31808565009 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 31 Jul 2013 01:19:43 +0200 Subject: NFC: netlink: Rename CMD_FW_UPLOAD to CMD_FW_DOWNLOAD Loading a firmware into a target is typically called firmware download, not firmware upload. So we rename the netlink API to NFC_CMD_FW_DOWNLOAD in order to avoid any terminology confusion from userspace. Signed-off-by: Samuel Ortiz --- include/net/nfc/hci.h | 2 +- include/net/nfc/nfc.h | 4 ++-- include/uapi/linux/nfc.h | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index 0af851c3b038..b64b7bce4b94 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -59,7 +59,7 @@ struct nfc_hci_ops { struct nfc_target *target); int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event, struct sk_buff *skb); - int (*fw_upload)(struct nfc_hci_dev *hdev, const char *firmware_name); + int (*fw_download)(struct nfc_hci_dev *hdev, const char *firmware_name); int (*discover_se)(struct nfc_hci_dev *dev); int (*enable_se)(struct nfc_hci_dev *dev, u32 se_idx); int (*disable_se)(struct nfc_hci_dev *dev, u32 se_idx); diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 0e353f1658bb..5f286b726bb6 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -68,7 +68,7 @@ struct nfc_ops { void *cb_context); int (*tm_send)(struct nfc_dev *dev, struct sk_buff *skb); int (*check_presence)(struct nfc_dev *dev, struct nfc_target *target); - int (*fw_upload)(struct nfc_dev *dev, const char *firmware_name); + int (*fw_download)(struct nfc_dev *dev, const char *firmware_name); /* Secure Element API */ int (*discover_se)(struct nfc_dev *dev); @@ -127,7 +127,7 @@ struct nfc_dev { int targets_generation; struct device dev; bool dev_up; - bool fw_upload_in_progress; + bool fw_download_in_progress; u8 rf_mode; bool polling; struct nfc_target *active_target; diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index caed0f324d5f..8137dd8d2adf 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -69,8 +69,8 @@ * starting a poll from a device which has a secure element enabled means * we want to do SE based card emulation. * @NFC_CMD_DISABLE_SE: Disable the physical link to a specific secure element. - * @NFC_CMD_FW_UPLOAD: Request to Load/flash firmware, or event to inform that - * some firmware was loaded + * @NFC_CMD_FW_DOWNLOAD: Request to Load/flash firmware, or event to inform + * that some firmware was loaded */ enum nfc_commands { NFC_CMD_UNSPEC, @@ -94,7 +94,7 @@ enum nfc_commands { NFC_CMD_DISABLE_SE, NFC_CMD_LLC_SDREQ, NFC_EVENT_LLC_SDRES, - NFC_CMD_FW_UPLOAD, + NFC_CMD_FW_DOWNLOAD, NFC_EVENT_SE_ADDED, NFC_EVENT_SE_REMOVED, /* private: internal use only */ -- cgit v1.2.3 From 2816c551c796ec14620325b2c9ed75b9979d3125 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 29 Jul 2013 19:50:33 +0200 Subject: tracing: trace_remove_event_call() should fail if call/file is in use Change trace_remove_event_call(call) to return the error if this call is active. This is what the callers assume but can't verify outside of the tracing locks. Both trace_kprobe.c/trace_uprobe.c need the additional changes, unregister_trace_probe() should abort if trace_remove_event_call() fails. The caller is going to free this call/file so we must ensure that nobody can use them after trace_remove_event_call() succeeds. debugfs should be fine after the previous changes and event_remove() does TRACE_REG_UNREGISTER, but still there are 2 reasons why we need the additional checks: - There could be a perf_event(s) attached to this tp_event, so the patch checks ->perf_refcount. - TRACE_REG_UNREGISTER can be suppressed by FTRACE_EVENT_FL_SOFT_MODE, so we simply check FTRACE_EVENT_FL_ENABLED protected by event_mutex. Link: http://lkml.kernel.org/r/20130729175033.GB26284@redhat.com Reviewed-by: Masami Hiramatsu Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4372658c73ae..f98ab063e95e 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -332,7 +332,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type); extern int trace_add_event_call(struct ftrace_event_call *call); -extern void trace_remove_event_call(struct ftrace_event_call *call); +extern int trace_remove_event_call(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < (type)1) -- cgit v1.2.3 From cd23b14b654769db83c9684ae1ba32c0e066670f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 18 Jul 2013 15:31:08 +0300 Subject: mlx5_core: Implement new initialization sequence Introduce enbale_hca and disable_hca commands to signify when the driver starts or ceases to operate on the device. In addition the driver will use boot and init pages count; boot pages is required to allow firmware to complete boot commands and the other to complete init hca. Command interface revision is bumped to 4 to enforce using supported firmware. This patch breaks compatibility with old versions of firmware (< 4); however, the first GA firmware we will publish will support version 4 so this should not be a problem. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/linux/mlx5/device.h | 20 ++++++++++++++++++++ include/linux/mlx5/driver.h | 4 +++- 2 files changed, 23 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8de8d8f22384..737685e9e852 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -690,6 +690,26 @@ struct mlx5_query_cq_mbox_out { __be64 pas[0]; }; +struct mlx5_enable_hca_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_enable_hca_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_disable_hca_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_disable_hca_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + struct mlx5_eq_context { u8 status; u8 ec_oi; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f22e4419839b..2aa258b0ced1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -101,6 +101,8 @@ enum { MLX5_CMD_OP_QUERY_ADAPTER = 0x101, MLX5_CMD_OP_INIT_HCA = 0x102, MLX5_CMD_OP_TEARDOWN_HCA = 0x103, + MLX5_CMD_OP_ENABLE_HCA = 0x104, + MLX5_CMD_OP_DISABLE_HCA = 0x105, MLX5_CMD_OP_QUERY_PAGES = 0x107, MLX5_CMD_OP_MANAGE_PAGES = 0x108, MLX5_CMD_OP_SET_HCA_CAP = 0x109, @@ -690,7 +692,7 @@ int mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s16 npages); -int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev); +int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); void mlx5_unregister_debugfs(void); -- cgit v1.2.3 From 22f2020f84c6da2dd0acb2dce12e39e59ff7c8be Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 31 Jul 2013 13:53:48 -0700 Subject: vmpressure: change vmpressure::sr_lock to spinlock There is nothing that can sleep inside critical sections protected by this lock and those sections are really small so there doesn't make much sense to use mutex for them. Change the log to a spinlock Signed-off-by: Michal Hocko Reported-by: Tejun Heo Cc: Anton Vorontsov Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Li Zefan Reviewed-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmpressure.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 76be077340ea..2081680e015d 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -12,7 +12,7 @@ struct vmpressure { unsigned long scanned; unsigned long reclaimed; /* The lock is used to keep the scanned/reclaimed above in sync. */ - struct mutex sr_lock; + struct spinlock sr_lock; /* The list of vmpressure_event structs. */ struct list_head events; -- cgit v1.2.3 From 33cb876e947b9ddda8dca3fb99234b743a597ef9 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 31 Jul 2013 13:53:51 -0700 Subject: vmpressure: make sure there are no events queued after memcg is offlined vmpressure is called synchronously from reclaim where the target_memcg is guaranteed to be alive but the eventfd is signaled from the work queue context. This means that memcg (along with vmpressure structure which is embedded into it) might go away while the work item is pending which would result in use-after-release bug. We have two possible ways how to fix this. Either vmpressure pins memcg before it schedules vmpr->work and unpin it in vmpressure_work_fn or explicitely flush the work item from the css_offline context (as suggested by Tejun). This patch implements the later one and it introduces vmpressure_cleanup which flushes the vmpressure work queue item item. It hooks into mem_cgroup_css_offline after the memcg itself is cleaned up. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Michal Hocko Reported-by: Tejun Heo Cc: Anton Vorontsov Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Li Zefan Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmpressure.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 2081680e015d..7dc17e2456de 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -30,6 +30,7 @@ extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio); extern void vmpressure_init(struct vmpressure *vmpr); +extern void vmpressure_cleanup(struct vmpressure *vmpr); extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css); -- cgit v1.2.3 From d9d10a30964504af834d8d250a0c76d4ae91eb1e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 30 Jul 2013 10:31:00 -0700 Subject: ndisc: Add missing inline to ndisc_addr_option_pad Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/ndisc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 949d77528f2f..6fea32340ae8 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -119,7 +119,7 @@ extern struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may * also need a pad of 2. */ -static int ndisc_addr_option_pad(unsigned short type) +static inline int ndisc_addr_option_pad(unsigned short type) { switch (type) { case ARPHRD_INFINIBAND: return 2; -- cgit v1.2.3 From 2ac3ac8f86f2fe065d746d9a9abaca867adec577 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Thu, 1 Aug 2013 10:04:14 +0200 Subject: ipv6: prevent fib6_run_gc() contention On a high-traffic router with many processors and many IPv6 dst entries, soft lockup in fib6_run_gc() can occur when number of entries reaches gc_thresh. This happens because fib6_run_gc() uses fib6_gc_lock to allow only one thread to run the garbage collector but ip6_dst_gc() doesn't update net->ipv6.ip6_rt_last_gc until fib6_run_gc() returns. On a system with many entries, this can take some time so that in the meantime, other threads pass the tests in ip6_dst_gc() (ip6_rt_last_gc is still not updated) and wait for the lock. They then have to run the garbage collector one after another which blocks them for quite long. Resolve this by replacing special value ~0UL of expire parameter to fib6_run_gc() by explicit "force" parameter to choose between spin_lock_bh() and spin_trylock_bh() and call fib6_run_gc() with force=false if gc_thresh is reached but not max_size. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 2a601e7da1bf..48ec25a7fcb6 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -300,7 +300,7 @@ extern void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info); extern void fib6_run_gc(unsigned long expires, - struct net *net); + struct net *net, bool force); extern void fib6_gc_cleanup(void); -- cgit v1.2.3 From dfcefb0be1231982784df2152213103ad33c1cfd Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 1 Aug 2013 11:10:24 +0800 Subject: net: fix a compile error when CONFIG_NET_LL_RX_POLL is not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_NET_LL_RX_POLL is not set, I got: net/socket.c: In function ‘sock_poll’: net/socket.c:1165:4: error: implicit declaration of function ‘sk_busy_loop’ [-Werror=implicit-function-declaration] Fix this by adding a nop when !CONFIG_NET_LL_RX_POLL. Cc: Eliezer Tamir Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/busy_poll.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index a14339c2985f..6cd8848fec68 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -181,5 +181,10 @@ static inline bool busy_loop_timeout(unsigned long end_time) return true; } +static inline bool sk_busy_loop(struct sock *sk, int nonblock) +{ + return false; +} + #endif /* CONFIG_NET_LL_RX_POLL */ #endif /* _LINUX_NET_BUSY_POLL_H */ -- cgit v1.2.3 From e0d1095ae3405404d247afb00233ef837d58da83 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 1 Aug 2013 11:10:25 +0800 Subject: net: rename CONFIG_NET_LL_RX_POLL to CONFIG_NET_RX_BUSY_POLL Eliezer renames several *ll_poll to *busy_poll, but forgets CONFIG_NET_LL_RX_POLL, so in case of confusion, rename it too. Cc: Eliezer Tamir Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- include/linux/skbuff.h | 2 +- include/net/busy_poll.h | 6 +++--- include/net/sock.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0741a1e919a5..9a4156845e93 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -973,7 +973,7 @@ struct net_device_ops { gfp_t gfp); void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL int (*ndo_busy_poll)(struct napi_struct *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5afefa01a13c..3b71a4e83642 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -501,7 +501,7 @@ struct sk_buff { /* 7/9 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); -#if defined CONFIG_NET_DMA || defined CONFIG_NET_LL_RX_POLL +#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL union { unsigned int napi_id; dma_cookie_t dma_cookie; diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 6cd8848fec68..f18b91966d3d 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -27,7 +27,7 @@ #include #include -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL struct napi_struct; extern unsigned int sysctl_net_busy_read __read_mostly; @@ -146,7 +146,7 @@ static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb) sk->sk_napi_id = skb->napi_id; } -#else /* CONFIG_NET_LL_RX_POLL */ +#else /* CONFIG_NET_RX_BUSY_POLL */ static inline unsigned long net_busy_loop_on(void) { return 0; @@ -186,5 +186,5 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock) return false; } -#endif /* CONFIG_NET_LL_RX_POLL */ +#endif /* CONFIG_NET_RX_BUSY_POLL */ #endif /* _LINUX_NET_BUSY_POLL_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 95a5a2c6925a..31d5cfbb51ec 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -327,7 +327,7 @@ struct sock { #ifdef CONFIG_RPS __u32 sk_rxhash; #endif -#ifdef CONFIG_NET_LL_RX_POLL +#ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sk_napi_id; unsigned int sk_ll_usec; #endif -- cgit v1.2.3 From ed5467da0e369e65b247b99eb6403cb79172bcda Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Fri, 2 Aug 2013 21:16:43 +0400 Subject: tracing: Fix fields of struct trace_iterator that are zeroed by mistake tracing_read_pipe zeros all fields bellow "seq". The declaration contains a comment about that, but it doesn't help. The first field is "snapshot", it's true when current open file is snapshot. Looks obvious, that it should not be zeroed. The second field is "started". It was converted from cpumask_t to cpumask_var_t (v2.6.28-4983-g4462344), in other words it was converted from cpumask to pointer on cpumask. Currently the reference on "started" memory is lost after the first read from tracing_read_pipe and a proper object will never be freed. The "started" is never dereferenced for trace_pipe, because trace_pipe can't have the TRACE_FILE_ANNOTATE options. Link: http://lkml.kernel.org/r/1375463803-3085183-1-git-send-email-avagin@openvz.org Cc: stable@vger.kernel.org # 2.6.30 Signed-off-by: Andrew Vagin Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index f98ab063e95e..120d57a1c3a5 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -78,6 +78,11 @@ struct trace_iterator { /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; + cpumask_var_t started; + + /* it's true when current open file is snapshot */ + bool snapshot; + /* The below is zeroed out in pipe_read */ struct trace_seq seq; struct trace_entry *ent; @@ -90,10 +95,7 @@ struct trace_iterator { loff_t pos; long idx; - cpumask_var_t started; - - /* it's true when current open file is snapshot */ - bool snapshot; + /* All new field here will be zeroed out in pipe_read */ }; enum trace_iter_flags { -- cgit v1.2.3 From e67bc51e574ffe3c4bc1e09cab7658b1e780b4ce Mon Sep 17 00:00:00 2001 From: Dhaval Giani Date: Fri, 2 Aug 2013 14:47:29 -0400 Subject: tracing: Fix trace_dump_stack() proto when CONFIG_TRACING is not set When CONFIG_TRACING is not enabled, the stub prototype for trace_dump_stack() is incorrect. It has (void) when it should be (int). Link: http://lkml.kernel.org/r/CAPhKKr_H=ukFnBL4WgDOVT5ay2xeF-Ho+CA0DWZX0E2JW-=vSQ@mail.gmail.com Signed-off-by: Dhaval Giani Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3bef14c6586b..482ad2d84a32 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -629,7 +629,7 @@ extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); static inline void tracing_start(void) { } static inline void tracing_stop(void) { } static inline void ftrace_off_permanent(void) { } -static inline void trace_dump_stack(void) { } +static inline void trace_dump_stack(int skip) { } static inline void tracing_on(void) { } static inline void tracing_off(void) { } -- cgit v1.2.3 From bf37d2b3fdc9e451f9e376a3922ac8df5aa24128 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Sun, 4 Aug 2013 12:55:48 +0300 Subject: busy_poll: cleanup do-nothing placeholders When renaming ll_poll to busy poll, I introduced a typo in the name of the do-nothing placeholder for sk_busy_loop and called it sk_busy_poll. This broke compile when busy poll was not configured. Cong Wang submitted a patch to fixed that. This patch removes the now redundant, misspelled placeholder. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- include/net/busy_poll.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index f18b91966d3d..8e2dfc106aed 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -162,11 +162,6 @@ static inline bool sk_can_busy_loop(struct sock *sk) return false; } -static inline bool sk_busy_poll(struct sock *sk, int nonblock) -{ - return false; -} - static inline void skb_mark_napi_id(struct sk_buff *skb, struct napi_struct *napi) { -- cgit v1.2.3 From 7d46daba8dd5df1aa45724518a041ef7163d3ad5 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 5 Aug 2013 16:05:32 +0300 Subject: mlx5: remove health handler plugin Remove this code, per Dave Miller's request, since it is not being used anywhere in the kernel. Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2aa258b0ced1..611e65e76b00 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -731,9 +731,6 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db); void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db); -typedef void (*health_handler_t)(struct pci_dev *pdev, struct health_buffer __iomem *buf, int size); -int mlx5_register_health_report_handler(health_handler_t handler); -void mlx5_unregister_health_report_handler(void); const char *mlx5_command_str(int command); int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev); -- cgit v1.2.3 From 007ccfcf89401e764c33965b739310d86a94626d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 Aug 2013 14:32:54 +0200 Subject: ACPI: Drop physical_node_id_bitmap from struct acpi_device The physical_node_id_bitmap in struct acpi_device is only used for looking up the first currently unused dependent phyiscal node ID by acpi_bind_one(). It is not really necessary, however, because acpi_bind_one() walks the entire physical_node_list of the given device object for sanity checking anyway and if that list is always sorted by node_id, it is straightforward to find the first gap between the currently used node IDs and use that number as the ID of the new list node. This also removes the artificial limit of the maximum number of dependent physical devices per ACPI device object, which now depends only on the capacity of unsigend int. As a result, it fixes a regression introduced by commit e2ff394 (ACPI / memhotplug: Bind removable memory blocks to ACPI device nodes) that caused acpi_memory_enable_device() to fail when the number of 128 MB blocks within one removable memory module was greater than 32. Reported-and-tested-by: Yasuaki Ishimatsu Signed-off-by: Rafael J. Wysocki Acked-by: Toshi Kani Reviewed-by: Yasuaki Ishimatsu --- include/acpi/acpi_bus.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 56e6b68c8d2f..5026aaa35133 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -274,15 +274,12 @@ struct acpi_device_wakeup { }; struct acpi_device_physical_node { - u8 node_id; + unsigned int node_id; struct list_head node; struct device *dev; bool put_online:1; }; -/* set maximum of physical nodes to 32 for expansibility */ -#define ACPI_MAX_PHYSICAL_NODE 32 - /* Device */ struct acpi_device { int device_type; @@ -302,10 +299,9 @@ struct acpi_device { struct acpi_driver *driver; void *driver_data; struct device dev; - u8 physical_node_count; + unsigned int physical_node_count; struct list_head physical_node_list; struct mutex physical_node_lock; - DECLARE_BITMAP(physical_node_id_bitmap, ACPI_MAX_PHYSICAL_NODE); struct list_head power_dependent; void (*remove)(struct acpi_device *); }; -- cgit v1.2.3 From 49ccc142f9cbc33fdda18e8fa90c1c5b4a79c0ad Mon Sep 17 00:00:00 2001 From: Mateusz Krawczuk Date: Tue, 6 Aug 2013 18:34:40 +0200 Subject: regmap: Add missing header for !CONFIG_REGMAP stubs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit regmap.h requires linux/err.h if CONFIG_REGMAP is not defined. Without it I get error. CC drivers/media/platform/exynos4-is/fimc-reg.o In file included from drivers/media/platform/exynos4-is/fimc-reg.c:14:0: include/linux/regmap.h: In function ‘regmap_write’: include/linux/regmap.h:525:10: error: ‘EINVAL’ undeclared (first use in this function) include/linux/regmap.h:525:10: note: each undeclared identifier is reported only once for each function it appears in Signed-off-by: Mateusz Krawczuk Signed-off-by: Kyungmin Park Signed-off-by: Mark Brown Cc: stable@kernel.org --- include/linux/regmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 75981d0b57dc..580a5320cc96 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -15,6 +15,7 @@ #include #include +#include struct module; struct device; -- cgit v1.2.3 From 60f75b8e97daf4a39790a20d962cb861b9220af5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Aug 2013 22:55:00 +0200 Subject: ACPI: Try harder to resolve _ADR collisions for bridges In theory, under a given ACPI namespace node there should be only one child device object with _ADR whose value matches a given bus address exactly. In practice, however, there are systems in which multiple child device objects under a given parent have _ADR matching exactly the same address. In those cases we use _STA to determine which of the multiple matching devices is enabled, since some systems are known to indicate which ACPI device object to associate with the given physical (usually PCI) device this way. Unfortunately, as it turns out, there are systems in which many device objects under the same parent have _ADR matching exactly the same bus address and none of them has _STA, in which case they all should be regarded as enabled according to the spec. Still, if those device objects are supposed to represent bridges (e.g. this is the case for device objects corresponding to PCIe ports), we can try harder and skip the ones that have no child device objects in the ACPI namespace. With luck, we can avoid using device objects that we are not expected to use this way. Although this only works for bridges whose children also have ACPI namespace representation, it is sufficient to address graphics adapter detection issues on some systems, so rework the code finding a matching device ACPI handle for a given bus address to implement this idea. Introduce a new function, acpi_find_child(), taking three arguments: the ACPI handle of the device's parent, a bus address suitable for the device's bus type and a bool indicating if the device is a bridge and make it work as outlined above. Reimplement the function currently used for this purpose, acpi_get_child(), as a call to acpi_find_child() with the last argument set to 'false' and make the PCI subsystem use acpi_find_child() with the bridge information passed as the last argument to it. [Lan Tianyu notices that it is not sufficient to use pci_is_bridge() for that, because the device's subordinate pointer hasn't been set yet at this point, so use hdr_type instead.] This change fixes a regression introduced inadvertently by commit 33f767d (ACPI: Rework acpi_get_child() to be more efficient) which overlooked the fact that for acpi_walk_namespace() "post-order" means "after all children have been visited" rather than "on the way back", so for device objects without children and for namespace walks of depth 1, as in the acpi_get_child() case, the "post-order" callbacks ordering is actually the same as the ordering of "pre-order" ones. Since that commit changed the namespace walk in acpi_get_child() to terminate after finding the first matching object instead of going through all of them and returning the last one, it effectively changed the result returned by that function in some rare cases and that led to problems (the switch from a "pre-order" to a "post-order" callback was supposed to prevent that from happening, but it was ineffective). As it turns out, the systems where the change made by commit 33f767d actually matters are those where there are multiple ACPI device objects representing the same PCIe port (which effectively is a bridge). Moreover, only one of them, and the one we are expected to use, has child device objects in the ACPI namespace, so the regression can be addressed as described above. References: https://bugzilla.kernel.org/show_bug.cgi?id=60561 Reported-by: Peter Wu Tested-by: Vladimir Lalov Signed-off-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas Cc: 3.9+ # 3.9+ --- include/acpi/acpi_bus.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 5026aaa35133..94383a70c1a3 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -441,7 +441,11 @@ struct acpi_pci_root { }; /* helper */ -acpi_handle acpi_get_child(acpi_handle, u64); +acpi_handle acpi_find_child(acpi_handle, u64, bool); +static inline acpi_handle acpi_get_child(acpi_handle handle, u64 addr) +{ + return acpi_find_child(handle, addr, false); +} int acpi_is_root_bridge(acpi_handle); struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle); #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)ACPI_HANDLE(dev)) -- cgit v1.2.3 From 786615bc1ce84150ded80daea6bd9f6297f48e73 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Aug 2013 16:04:47 -0400 Subject: SUNRPC: If the rpcbind channel is disconnected, fail the call to unregister If rpcbind causes our connection to the AF_LOCAL socket to close after we've registered a service, then we want to be careful about reconnecting since the mount namespace may have changed. By simply refusing to reconnect the AF_LOCAL socket in the case of unregister, we avoid the need to somehow save the mount namespace. While this may lead to some services not unregistering properly, it should be safe. Signed-off-by: Trond Myklebust Cc: Nix Cc: Jeff Layton Cc: stable@vger.kernel.org # 3.9.x --- include/linux/sunrpc/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 6d870353674a..1821445708d6 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -121,6 +121,7 @@ struct rpc_task_setup { #define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ #define RPC_TASK_SENT 0x0800 /* message was sent */ #define RPC_TASK_TIMEOUT 0x1000 /* fail with ETIMEDOUT on timeout */ +#define RPC_TASK_NOCONNECT 0x2000 /* return ENOTCONN if not connected */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) -- cgit v1.2.3 From 8742f229b635bf1c1c84a3dfe5e47c814c20b5c8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 8 Aug 2013 18:55:32 +0200 Subject: userns: limit the maximum depth of user_namespace->parent chain Ensure that user_namespace->parent chain can't grow too much. Currently we use the hardroded 32 as limit. Reported-by: Andy Lutomirski Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- include/linux/user_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index b6b215f13b45..14105c26a836 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -23,6 +23,7 @@ struct user_namespace { struct uid_gid_map projid_map; atomic_t count; struct user_namespace *parent; + int level; kuid_t owner; kgid_t group; unsigned int proc_inum; -- cgit v1.2.3 From 288a9376371d425edeeea41a0310922c5fb2092d Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Wed, 7 Aug 2013 11:33:25 +0300 Subject: net: rename busy poll MIB counter Rename mib counter from "low latency" to "busy poll" v1 also moved the counter to the ip MIB (suggested by Shawn Bohrer) Eric Dumazet suggested that the current location is better. So v2 just renames the counter to fit the new naming convention. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- include/net/busy_poll.h | 2 +- include/uapi/linux/snmp.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 8e2dfc106aed..8a358a2c97e6 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -122,7 +122,7 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock) if (rc > 0) /* local bh are disabled so it is ok to use _BH */ NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_LOWLATENCYRXPACKETS, rc); + LINUX_MIB_BUSYPOLLRXPACKETS, rc); } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && !need_resched() && !busy_loop_timeout(end_time)); diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index af0a674cc677..a1356d3b54df 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -253,7 +253,7 @@ enum LINUX_MIB_TCPFASTOPENLISTENOVERFLOW, /* TCPFastOpenListenOverflow */ LINUX_MIB_TCPFASTOPENCOOKIEREQD, /* TCPFastOpenCookieReqd */ LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES, /* TCPSpuriousRtxHostQueues */ - LINUX_MIB_LOWLATENCYRXPACKETS, /* LowLatencyRxPackets */ + LINUX_MIB_BUSYPOLLRXPACKETS, /* BusyPollRxPackets */ __LINUX_MIB_MAX }; -- cgit v1.2.3 From e0acd0a68ec7dbf6b7a81a87a867ebd7ac9b76c4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 12 Aug 2013 18:14:00 +0200 Subject: sched: fix the theoretical signal_wake_up() vs schedule() race This is only theoretical, but after try_to_wake_up(p) was changed to check p->state under p->pi_lock the code like __set_current_state(TASK_INTERRUPTIBLE); schedule(); can miss a signal. This is the special case of wait-for-condition, it relies on try_to_wake_up/schedule interaction and thus it does not need mb() between __set_current_state() and if(signal_pending). However, this __set_current_state() can move into the critical section protected by rq->lock, now that try_to_wake_up() takes another lock we need to ensure that it can't be reordered with "if (signal_pending(current))" check inside that section. The patch is actually one-liner, it simply adds smp_wmb() before spin_lock_irq(rq->lock). This is what try_to_wake_up() already does by the same reason. We turn this wmb() into the new helper, smp_mb__before_spinlock(), for better documentation and to allow the architectures to change the default implementation. While at it, kill smp_mb__after_lock(), it has no callers. Perhaps we can also add smp_mb__before/after_spinunlock() for prepare_to_wait(). Signed-off-by: Oleg Nesterov Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- include/linux/spinlock.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 7d537ced949a..75f34949d9ab 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -117,9 +117,17 @@ do { \ #endif /*arch_spin_is_contended*/ #endif -/* The lock does not imply full memory barrier. */ -#ifndef ARCH_HAS_SMP_MB_AFTER_LOCK -static inline void smp_mb__after_lock(void) { smp_mb(); } +/* + * Despite its name it doesn't necessarily has to be a full barrier. + * It should only guarantee that a STORE before the critical section + * can not be reordered with a LOAD inside this section. + * spin_lock() is the one-way barrier, this LOAD can not escape out + * of the region. So the default implementation simply ensures that + * a STORE can not move into the critical section, smp_wmb() should + * serialize it with another STORE done by spin_lock(). + */ +#ifndef smp_mb__before_spinlock +#define smp_mb__before_spinlock() smp_wmb() #endif /** -- cgit v1.2.3 From 4221f40513233fa8edeef7fc82e44163fde03b9b Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 13 Aug 2013 01:41:06 -0700 Subject: ip_tunnel: Do not use inner ip-header-id for tunnel ip-header-id. Using inner-id for tunnel id is not safe in some rare cases. E.g. packets coming from multiple sources entering same tunnel can have same id. Therefore on tunnel packet receive we could have packets from two different stream but with same source and dst IP with same ip-id which could confuse ip packet reassembly. Following patch reverts optimization from commit 490ab08127 (IP_GRE: Fix IP-Identification.) CC: Jarno Rajahalme CC: Ansis Atteka Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 781b3cf86a2f..a354db5b7662 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -145,20 +145,6 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, return INET_ECN_encapsulate(tos, inner); } -static inline void tunnel_ip_select_ident(struct sk_buff *skb, - const struct iphdr *old_iph, - struct dst_entry *dst) -{ - struct iphdr *iph = ip_hdr(skb); - - /* Use inner packet iph-id if possible. */ - if (skb->protocol == htons(ETH_P_IP) && old_iph->id) - iph->id = old_iph->id; - else - __ip_select_ident(iph, dst, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); -} - int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto); int iptunnel_xmit(struct net *net, struct rtable *rt, struct sk_buff *skb, -- cgit v1.2.3 From 179ef71cbc085252e3fe6b8159263a7ed1d88ea4 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 13 Aug 2013 16:00:49 -0700 Subject: mm: save soft-dirty bits on swapped pages Andy Lutomirski reported that if a page with _PAGE_SOFT_DIRTY bit set get swapped out, the bit is getting lost and no longer available when pte read back. To resolve this we introduce _PTE_SWP_SOFT_DIRTY bit which is saved in pte entry for the page being swapped out. When such page is to be read back from a swap cache we check for bit presence and if it's there we clear it and restore the former _PAGE_SOFT_DIRTY bit back. One of the problem was to find a place in pte entry where we can save the _PTE_SWP_SOFT_DIRTY bit while page is in swap. The _PAGE_PSE was chosen for that, it doesn't intersect with swap entry format stored in pte. Reported-by: Andy Lutomirski Signed-off-by: Cyrill Gorcunov Acked-by: Pavel Emelyanov Cc: Matt Mackall Cc: Xiao Guangrong Cc: Marcelo Tosatti Cc: KOSAKI Motohiro Cc: Stephen Rothwell Cc: Peter Zijlstra Cc: "Aneesh Kumar K.V" Reviewed-by: Minchan Kim Reviewed-by: Wanpeng Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable.h | 15 +++++++++++++++ include/linux/swapops.h | 2 ++ 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 2f47ade1b567..2a7e0d10ad9a 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -417,6 +417,21 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) { return pmd; } + +static inline pte_t pte_swp_mksoft_dirty(pte_t pte) +{ + return pte; +} + +static inline int pte_swp_soft_dirty(pte_t pte) +{ + return 0; +} + +static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) +{ + return pte; +} #endif #ifndef __HAVE_PFNMAP_TRACKING diff --git a/include/linux/swapops.h b/include/linux/swapops.h index c5fd30d2a415..8d4fa82bfb91 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -67,6 +67,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte) swp_entry_t arch_entry; BUG_ON(pte_file(pte)); + if (pte_swp_soft_dirty(pte)) + pte = pte_swp_clear_soft_dirty(pte); arch_entry = __pte_to_swp_entry(pte); return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); } -- cgit v1.2.3 From 41bb3476b361ef38576cf9d539b19bae2ac93167 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 13 Aug 2013 16:00:51 -0700 Subject: mm: save soft-dirty bits on file pages Andy reported that if file page get reclaimed we lose the soft-dirty bit if it was there, so save _PAGE_BIT_SOFT_DIRTY bit when page address get encoded into pte entry. Thus when #pf happens on such non-present pte we can restore it back. Reported-by: Andy Lutomirski Signed-off-by: Cyrill Gorcunov Acked-by: Pavel Emelyanov Cc: Matt Mackall Cc: Xiao Guangrong Cc: Marcelo Tosatti Cc: KOSAKI Motohiro Cc: Stephen Rothwell Cc: Peter Zijlstra Cc: "Aneesh Kumar K.V" Cc: Minchan Kim Cc: Wanpeng Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 2a7e0d10ad9a..0807ddf97b05 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -432,6 +432,21 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) { return pte; } + +static inline pte_t pte_file_clear_soft_dirty(pte_t pte) +{ + return pte; +} + +static inline pte_t pte_file_mksoft_dirty(pte_t pte) +{ + return pte; +} + +static inline int pte_file_soft_dirty(pte_t pte) +{ + return 0; +} #endif #ifndef __HAVE_PFNMAP_TRACKING -- cgit v1.2.3 From dfa9771a7c4784bafd0673bc7abcee3813088b77 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 13 Aug 2013 16:00:53 -0700 Subject: microblaze: fix clone syscall Fix inadvertent breakage in the clone syscall ABI for Microblaze that was introduced in commit f3268edbe6fe ("microblaze: switch to generic fork/vfork/clone"). The Microblaze syscall ABI for clone takes the parent tid address in the 4th argument; the third argument slot is used for the stack size. The incorrectly-used CLONE_BACKWARDS type assigned parent tid to the 3rd slot. This commit restores the original ABI so that existing userspace libc code will work correctly. All kernel versions from v3.8-rc1 were affected. Signed-off-by: Michal Simek Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4147d700a293..84662ecc7b51 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -802,9 +802,14 @@ asmlinkage long sys_vfork(void); asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int, int __user *); #else +#ifdef CONFIG_CLONE_BACKWARDS3 +asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *, + int __user *, int); +#else asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int __user *, int); #endif +#endif asmlinkage long sys_execve(const char __user *filename, const char __user *const __user *argv, -- cgit v1.2.3 From df54d6fa54275ce59660453e29d1228c2b45a826 Mon Sep 17 00:00:00 2001 From: Radu Caragea Date: Tue, 13 Aug 2013 16:00:59 -0700 Subject: x86 get_unmapped_area(): use proper mmap base for bottom-up direction When the stack is set to unlimited, the bottomup direction is used for mmap-ings but the mmap_base is not used and thus effectively renders ASLR for mmapings along with PIE useless. Cc: Michel Lespinasse Cc: Oleg Nesterov Reviewed-by: Rik van Riel Acked-by: Ingo Molnar Cc: Adrian Sendroiu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index d722490da030..923dd6ea4a0e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -314,6 +314,7 @@ struct nsproxy; struct user_namespace; #ifdef CONFIG_MMU +extern unsigned long mmap_legacy_base(void); extern void arch_pick_mmap_layout(struct mm_struct *mm); extern unsigned long arch_get_unmapped_area(struct file *, unsigned long, unsigned long, -- cgit v1.2.3 From 8a8e3d84b1719a56f9151909e80ea6ebc5b8e318 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 14 Aug 2013 23:47:11 +0200 Subject: net_sched: restore "linklayer atm" handling commit 56b765b79 ("htb: improved accuracy at high rates") broke the "linklayer atm" handling. tc class add ... htb rate X ceil Y linklayer atm The linklayer setting is implemented by modifying the rate table which is send to the kernel. No direct parameter were transferred to the kernel indicating the linklayer setting. The commit 56b765b79 ("htb: improved accuracy at high rates") removed the use of the rate table system. To keep compatible with older iproute2 utils, this patch detects the linklayer by parsing the rate table. It also supports future versions of iproute2 to send this linklayer parameter to the kernel directly. This is done by using the __reserved field in struct tc_ratespec, to convey the choosen linklayer option, but only using the lower 4 bits of this field. Linklayer detection is limited to speeds below 100Mbit/s, because at high rates the rtab is gets too inaccurate, so bad that several fields contain the same values, this resembling the ATM detect. Fields even start to contain "0" time to send, e.g. at 1000Mbit/s sending a 96 bytes packet cost "0", thus the rtab have been more broken than we first realized. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/sch_generic.h | 9 ++++++++- include/uapi/linux/pkt_sched.h | 10 +++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 6eab63363e59..e5ae0c50fa9c 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -683,13 +683,19 @@ struct psched_ratecfg { u64 rate_bytes_ps; /* bytes per second */ u32 mult; u16 overhead; + u8 linklayer; u8 shift; }; static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, unsigned int len) { - return ((u64)(len + r->overhead) * r->mult) >> r->shift; + len += r->overhead; + + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) + return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; + + return ((u64)len * r->mult) >> r->shift; } extern void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf); @@ -700,6 +706,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, memset(res, 0, sizeof(*res)); res->rate = r->rate_bytes_ps; res->overhead = r->overhead; + res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } #endif diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index dbd71b0c7d8c..09d62b9228ff 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -73,9 +73,17 @@ struct tc_estimator { #define TC_H_ROOT (0xFFFFFFFFU) #define TC_H_INGRESS (0xFFFFFFF1U) +/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */ +enum tc_link_layer { + TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */ + TC_LINKLAYER_ETHERNET, + TC_LINKLAYER_ATM, +}; +#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */ + struct tc_ratespec { unsigned char cell_log; - unsigned char __reserved; + __u8 linklayer; /* lower 4 bits */ unsigned short overhead; short cell_align; unsigned short mpu; -- cgit v1.2.3 From 0a324f3189ed9c78b1aaf48d88e93cb18643c655 Mon Sep 17 00:00:00 2001 From: Moshe Lazer Date: Wed, 14 Aug 2013 17:46:48 +0300 Subject: net/mlx5_core: Support MANAGE_PAGES and QUERY_PAGES firmware command changes In the previous QUERY_PAGES command version we used one command to get the required amount of boot, init and post init pages. The new version uses the op_mod field to specify whether the query is for the required amount of boot, init or post init pages. In addition the output field size for the required amount of pages increased from 16 to 32 bits. In MANAGE_PAGES command the input_num_entries and output_num_entries fields sizes changed from 16 to 32 bits and the PAS tables offset changed to 0x10. In the pages request event the num_pages field also changed to 32 bits. In the HCA-capabilities-layout the size and location of max_qp_mcg field has been changed to support 24 bits. This patch isn't compatible with firmware versions < 5; however, it turns out that the first GA firmware we will publish will not support previous versions so this should be OK. Signed-off-by: Moshe Lazer Signed-off-by: Eli Cohen Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 22 ++++++++++------------ include/linux/mlx5/driver.h | 4 ++-- 2 files changed, 12 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 737685e9e852..68029b30c3dc 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -309,21 +309,20 @@ struct mlx5_hca_cap { __be16 max_desc_sz_rq; u8 rsvd21[2]; __be16 max_desc_sz_sq_dc; - u8 rsvd22[4]; - __be16 max_qp_mcg; - u8 rsvd23; + __be32 max_qp_mcg; + u8 rsvd22[3]; u8 log_max_mcg; - u8 rsvd24; + u8 rsvd23; u8 log_max_pd; - u8 rsvd25; + u8 rsvd24; u8 log_max_xrcd; - u8 rsvd26[42]; + u8 rsvd25[42]; __be16 log_uar_page_sz; - u8 rsvd27[28]; + u8 rsvd26[28]; u8 log_msx_atomic_size_qp; - u8 rsvd28[2]; + u8 rsvd27[2]; u8 log_msx_atomic_size_dc; - u8 rsvd29[76]; + u8 rsvd28[76]; }; @@ -472,9 +471,8 @@ struct mlx5_eqe_cmd { struct mlx5_eqe_page_req { u8 rsvd0[2]; __be16 func_id; - u8 rsvd1[2]; - __be16 num_pages; - __be32 rsvd2[5]; + __be32 num_pages; + __be32 rsvd1[5]; }; union ev_data { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 611e65e76b00..8888381fc150 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -358,7 +358,7 @@ struct mlx5_caps { u32 reserved_lkey; u8 local_ca_ack_delay; u8 log_max_mcg; - u16 max_qp_mcg; + u32 max_qp_mcg; int min_page_sz; }; @@ -691,7 +691,7 @@ void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); int mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s16 npages); + s32 npages); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); -- cgit v1.2.3 From 2b047252d087be7f2ba088b4933cd904f92e6fce Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 15 Aug 2013 11:42:25 -0700 Subject: Fix TLB gather virtual address range invalidation corner cases Ben Tebulin reported: "Since v3.7.2 on two independent machines a very specific Git repository fails in 9/10 cases on git-fsck due to an SHA1/memory failures. This only occurs on a very specific repository and can be reproduced stably on two independent laptops. Git mailing list ran out of ideas and for me this looks like some very exotic kernel issue" and bisected the failure to the backport of commit 53a59fc67f97 ("mm: limit mmu_gather batching to fix soft lockups on !CONFIG_PREEMPT"). That commit itself is not actually buggy, but what it does is to make it much more likely to hit the partial TLB invalidation case, since it introduces a new case in tlb_next_batch() that previously only ever happened when running out of memory. The real bug is that the TLB gather virtual memory range setup is subtly buggered. It was introduced in commit 597e1c3580b7 ("mm/mmu_gather: enable tlb flush range in generic mmu_gather"), and the range handling was already fixed at least once in commit e6c495a96ce0 ("mm: fix the TLB range flushed when __tlb_remove_page() runs out of slots"), but that fix was not complete. The problem with the TLB gather virtual address range is that it isn't set up by the initial tlb_gather_mmu() initialization (which didn't get the TLB range information), but it is set up ad-hoc later by the functions that actually flush the TLB. And so any such case that forgot to update the TLB range entries would potentially miss TLB invalidates. Rather than try to figure out exactly which particular ad-hoc range setup was missing (I personally suspect it's the hugetlb case in zap_huge_pmd(), which didn't have the same logic as zap_pte_range() did), this patch just gets rid of the problem at the source: make the TLB range information available to tlb_gather_mmu(), and initialize it when initializing all the other tlb gather fields. This makes the patch larger, but conceptually much simpler. And the end result is much more understandable; even if you want to play games with partial ranges when invalidating the TLB contents in chunks, now the range information is always there, and anybody who doesn't want to bother with it won't introduce subtle bugs. Ben verified that this fixes his problem. Reported-bisected-and-tested-by: Ben Tebulin Build-testing-by: Stephen Rothwell Build-testing-by: Richard Weinberger Reviewed-by: Michal Hocko Acked-by: Peter Zijlstra Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- include/asm-generic/tlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 13821c339a41..5672d7ea1fa0 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -112,7 +112,7 @@ struct mmu_gather { #define HAVE_GENERIC_MMU_GATHER -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm); +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); void tlb_flush_mmu(struct mmu_gather *tlb); void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end); -- cgit v1.2.3