From f872f5400cc01373d8e29d9c7a5296ccfaf4ccf3 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Tue, 29 Dec 2015 20:12:19 -0800
Subject: mm: Add a vm_special_mapping.fault() method

Requiring special mappings to give a list of struct pages is
inflexible: it prevents sane use of IO memory in a special
mapping, it's inefficient (it requires arch code to initialize a
list of struct pages, and it requires the mm core to walk the
entire list just to figure out how long it is), and it prevents
arch code from doing anything fancy when a special mapping fault
occurs.

Add a .fault method as an alternative to filling in a .pages
array.

Looks-OK-to: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/a26d1677c0bc7e774c33f469451a78ca31e9e6af.1451446564.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f8d1492a114f..c88e48a3c155 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -568,10 +568,26 @@ static inline void clear_tlb_flush_pending(struct mm_struct *mm)
 }
 #endif
 
-struct vm_special_mapping
-{
-	const char *name;
+struct vm_fault;
+
+struct vm_special_mapping {
+	const char *name;	/* The name, e.g. "[vdso]". */
+
+	/*
+	 * If .fault is not provided, this points to a
+	 * NULL-terminated array of pages that back the special mapping.
+	 *
+	 * This must not be NULL unless .fault is provided.
+	 */
 	struct page **pages;
+
+	/*
+	 * If non-NULL, then this is called to resolve page faults
+	 * on the special mapping.  If used, .pages is not checked.
+	 */
+	int (*fault)(const struct vm_special_mapping *sm,
+		     struct vm_area_struct *vma,
+		     struct vm_fault *vmf);
 };
 
 enum tlb_flush_reason {
-- 
cgit v1.2.3


From 1745cbc5d0dee0749a6bc0ea8e872c5db0074061 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 29 Dec 2015 20:12:20 -0800
Subject: mm: Add vm_insert_pfn_prot()

The x86 vvar vma contains pages with differing cacheability
flags.  x86 currently implements this by manually inserting all
the ptes using (io_)remap_pfn_range when the vma is set up.

x86 wants to move to using .fault with VM_FAULT_NOPAGE to set up
the mappings as needed.  The correct API to use to insert a pfn
in .fault is vm_insert_pfn(), but vm_insert_pfn() can't override the
vma's cache mode, and the HPET page in particular needs to be
uncached despite the fact that the rest of the VMA is cached.

Add vm_insert_pfn_prot() to support varying cacheability within
the same non-COW VMA in a more sane manner.

x86 could alternatively use multiple VMAs, but that's messy,
would break CRIU, and would create unnecessary VMAs that would
waste memory.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/d2938d1eb37be7a5e4f86182db646551f11e45aa.1451446564.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 00bad7793788..87ef1d7730ba 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2080,6 +2080,8 @@ int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
 int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn);
+int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+			unsigned long pfn, pgprot_t pgprot);
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn);
 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
-- 
cgit v1.2.3


From dd42ac8f02aea32661756554aace2095f7181d34 Mon Sep 17 00:00:00 2001
From: Alexander Kuleshov <kuleshovmail@gmail.com>
Date: Fri, 16 Oct 2015 15:20:53 +0600
Subject: clockevents: Rename last parameter of clocks_calc_mult_shift() to
 maxsec

Last parameter of the clocks_calc_mult_shift() was renamed from minsec to
maxsec in the 5fdade95 (time: Rename misnamed minsec argument of
clocks_calc_mult_shift()).

Signed-off-by: Alexander Kuleshov <kuleshovmail@gmail.com>
Link: http://lkml.kernel.org/r/1444987253-11018-1-git-send-email-kuleshovmail@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index bdcf358dfce2..0d442e34c349 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -190,9 +190,9 @@ extern void clockevents_config_and_register(struct clock_event_device *dev,
 extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq);
 
 static inline void
-clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec)
+clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 maxsec)
 {
-	return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, minsec);
+	return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, maxsec);
 }
 
 extern void clockevents_suspend(void);
-- 
cgit v1.2.3


From 9c808765e88efb6fa6af7e2206ef89512f1840a7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 15 Jan 2016 17:41:08 +0000
Subject: hrtimer: Add support for CLOCK_MONOTONIC_RAW

The KVM/ARM timer implementation arms a hrtimer when a vcpu is
blocked (usually because it is waiting for an interrupt)
while its timer is going to kick in the future.

It is essential that this timer doesn't get adjusted, or the
guest will end up being woken-up at the wrong time (NTP running
on the host seems to confuse the hell out of some guests).

In order to allow this, let's add CLOCK_MONOTONIC_RAW support
to hrtimer (it is so far only supported for posix timers). It also
has the (limited) benefit of fixing de0421d53bfb ("mac80211_hwsim:
shuffle code to prepare for dynamic radios"), which already uses
this functionnality without realizing wasn't implemented (just being
lucky...).

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Tomasz Nowicki <tn@semihalf.com>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Link: http://lkml.kernel.org/r/1452879670-16133-2-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 76dd4f0da5ca..a6d64af5e73f 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -151,6 +151,7 @@ enum  hrtimer_base_type {
 	HRTIMER_BASE_REALTIME,
 	HRTIMER_BASE_BOOTTIME,
 	HRTIMER_BASE_TAI,
+	HRTIMER_BASE_MONOTONIC_RAW,
 	HRTIMER_MAX_CLOCK_BASES,
 };
 
-- 
cgit v1.2.3


From cb150b9d23be6ee7f3a0fff29784f1c5b5ac514d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 27 Jan 2016 13:29:34 +0100
Subject: cfg80211/wext: fix message ordering

Since cfg80211 frequently takes actions from its netdev notifier
call, wireless extensions messages could still be ordered badly
since the wext netdev notifier, since wext is built into the
kernel, runs before the cfg80211 netdev notifier. For example,
the following can happen:

5: wlan1: <BROADCAST,MULTICAST> mtu 1500 qdisc mq state DOWN group default
    link/ether 02:00:00:00:01:00 brd ff:ff:ff:ff:ff:ff
5: wlan1: <BROADCAST,MULTICAST,UP>
    link/ether

when setting the interface down causes the wext message.

To also fix this, export the wireless_nlevent_flush() function
and also call it from the cfg80211 notifier.

Cc: stable@vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/iw_handler.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index 8f81bbbc38fc..e0f4109e64c6 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -439,6 +439,12 @@ int dev_get_wireless_info(char *buffer, char **start, off_t offset, int length);
 /* Send a single event to user space */
 void wireless_send_event(struct net_device *dev, unsigned int cmd,
 			 union iwreq_data *wrqu, const char *extra);
+#ifdef CONFIG_WEXT_CORE
+/* flush all previous wext events - if work is done from netdev notifiers */
+void wireless_nlevent_flush(void);
+#else
+static inline void wireless_nlevent_flush(void) {}
+#endif
 
 /* We may need a function to send a stream of events to user space.
  * More on that later... */
-- 
cgit v1.2.3


From 9babd5c8caa6e62c116efc3a64a09f65af4112b0 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Tue, 26 Jan 2016 21:57:17 +0100
Subject: resource: Add System RAM resource type

The IORESOURCE_MEM I/O resource type is used for all types of
memory-mapped ranges, ex. System RAM, System ROM, Video RAM,
Persistent Memory, PCI Bus, PCI MMCONFIG, ACPI Tables, IOAPIC,
reserved, and so on.

This requires walk_system_ram_range(), walk_system_ram_res(),
and region_intersects() to use strcmp() against string "System
RAM" to search for System RAM ranges in the iomem table, which
is inefficient. __ioremap_caller() and reserve_memtype() on x86,
for instance, call walk_system_ram_range() for every request to
check if a given range is in System RAM ranges.

However, adding a new I/O resource type for System RAM is not a
viable option, see [1]. There are approx. 3800 references to
IORESOURCE_MEM in the kernel/drivers, which makes it very
difficult to distinguish their usages between new type and
IORESOURCE_MEM.

The I/O resource types are also used by the PNP subsystem.
Therefore, introduce an extended I/O resource type,
IORESOURCE_SYSTEM_RAM, which consists of IORESOURCE_MEM and a
new modifier flag IORESOURCE_SYSRAM, see [2].

To keep the code 'if (resource_type(r) == IORESOURCE_MEM)' still
working for System RAM, resource_ext_type() is added for
extracting extended type bits.

Link[1]: http://lkml.kernel.org/r/1449168859.9855.54.camel@hpe.com
Link[2]: http://lkml.kernel.org/r/CA+55aFy4WQrWexC4u2LxX9Mw2NVoznw7p3Yh=iF4Xtf7zKWnRw@mail.gmail.com

Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Hanjun Guo <hanjun.guo@linaro.org>
Cc: Jakub Sitnicki <jsitnicki@gmail.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-mm <linux-mm@kvack.org>
Link: http://lkml.kernel.org/r/1453841853-11383-2-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/ioport.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 24bea087e7af..4b65d944717f 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -49,12 +49,19 @@ struct resource {
 #define IORESOURCE_WINDOW	0x00200000	/* forwarded by bridge */
 #define IORESOURCE_MUXED	0x00400000	/* Resource is software muxed */
 
+#define IORESOURCE_EXT_TYPE_BITS 0x01000000	/* Resource extended types */
+#define IORESOURCE_SYSRAM	0x01000000	/* System RAM (modifier) */
+
 #define IORESOURCE_EXCLUSIVE	0x08000000	/* Userland may not map this resource */
+
 #define IORESOURCE_DISABLED	0x10000000
 #define IORESOURCE_UNSET	0x20000000	/* No address assigned yet */
 #define IORESOURCE_AUTO		0x40000000
 #define IORESOURCE_BUSY		0x80000000	/* Driver has marked this resource busy */
 
+/* I/O resource extended types */
+#define IORESOURCE_SYSTEM_RAM		(IORESOURCE_MEM|IORESOURCE_SYSRAM)
+
 /* PnP IRQ specific bits (IORESOURCE_BITS) */
 #define IORESOURCE_IRQ_HIGHEDGE		(1<<0)
 #define IORESOURCE_IRQ_LOWEDGE		(1<<1)
@@ -170,6 +177,10 @@ static inline unsigned long resource_type(const struct resource *res)
 {
 	return res->flags & IORESOURCE_TYPE_BITS;
 }
+static inline unsigned long resource_ext_type(const struct resource *res)
+{
+	return res->flags & IORESOURCE_EXT_TYPE_BITS;
+}
 /* True iff r1 completely contains r2 */
 static inline bool resource_contains(struct resource *r1, struct resource *r2)
 {
-- 
cgit v1.2.3


From 43ee493bde78da00deaf5737925365c691a036ad Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Tue, 26 Jan 2016 21:57:19 +0100
Subject: resource: Add I/O resource descriptor

walk_iomem_res() and region_intersects() still need to use
strcmp() for searching a resource entry by @name in the iomem
table.

This patch introduces I/O resource descriptor 'desc' in struct
resource for the iomem search interfaces. Drivers can assign
their unique descriptor to a range when they support the search
interfaces.

Otherwise, 'desc' is set to IORES_DESC_NONE (0). This avoids
changing most of the drivers as they typically allocate resource
entries statically, or by calling alloc_resource(), kzalloc(),
or alloc_bootmem_low(), which set the field to zero by default.
A later patch will address some drivers that use kmalloc()
without zero'ing the field.

Also change release_mem_region_adjustable() to set 'desc' when
its resource entry gets separated. Other resource interfaces are
also changed to initialize 'desc' explicitly although
alloc_resource() sets it to 0.

Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jakub Sitnicki <jsitnicki@gmail.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-mm <linux-mm@kvack.org>
Link: http://lkml.kernel.org/r/1453841853-11383-4-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/ioport.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 4b65d944717f..983bea05d69c 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -20,6 +20,7 @@ struct resource {
 	resource_size_t end;
 	const char *name;
 	unsigned long flags;
+	unsigned long desc;
 	struct resource *parent, *sibling, *child;
 };
 
@@ -112,6 +113,22 @@ struct resource {
 /* PCI control bits.  Shares IORESOURCE_BITS with above PCI ROM.  */
 #define IORESOURCE_PCI_FIXED		(1<<4)	/* Do not move resource */
 
+/*
+ * I/O Resource Descriptors
+ *
+ * Descriptors are used by walk_iomem_res_desc() and region_intersects()
+ * for searching a specific resource range in the iomem table.  Assign
+ * a new descriptor when a resource range supports the search interfaces.
+ * Otherwise, resource.desc must be set to IORES_DESC_NONE (0).
+ */
+enum {
+	IORES_DESC_NONE				= 0,
+	IORES_DESC_CRASH_KERNEL			= 1,
+	IORES_DESC_ACPI_TABLES			= 2,
+	IORES_DESC_ACPI_NV_STORAGE		= 3,
+	IORES_DESC_PERSISTENT_MEMORY		= 4,
+	IORES_DESC_PERSISTENT_MEMORY_LEGACY	= 5,
+};
 
 /* helpers to define resources */
 #define DEFINE_RES_NAMED(_start, _size, _name, _flags)			\
@@ -120,6 +137,7 @@ struct resource {
 		.end = (_start) + (_size) - 1,				\
 		.name = (_name),					\
 		.flags = (_flags),					\
+		.desc = IORES_DESC_NONE,				\
 	}
 
 #define DEFINE_RES_IO_NAMED(_start, _size, _name)			\
-- 
cgit v1.2.3


From 1c29f25bf5d6c557017f619b638c619cbbf798c4 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Tue, 26 Jan 2016 21:57:28 +0100
Subject: memremap: Change region_intersects() to take @flags and @desc

Change region_intersects() to identify a target with @flags and
@desc, instead of @name with strcmp().

Change the callers of region_intersects(), memremap() and
devm_memremap(), to set IORESOURCE_SYSTEM_RAM in @flags and
IORES_DESC_NONE in @desc when searching System RAM.

Also, export region_intersects() so that the ACPI EINJ error
injection driver can call this function in a later patch.

Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jakub Sitnicki <jsitnicki@gmail.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: linux-arch@vger.kernel.org
Cc: linux-mm <linux-mm@kvack.org>
Link: http://lkml.kernel.org/r/1453841853-11383-13-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f1cd22f2df1a..cd5a300d3397 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -385,7 +385,8 @@ enum {
 	REGION_MIXED,
 };
 
-int region_intersects(resource_size_t offset, size_t size, const char *type);
+int region_intersects(resource_size_t offset, size_t size, unsigned long flags,
+		      unsigned long desc);
 
 /* Support for virtually mapped pages */
 struct page *vmalloc_to_page(const void *addr);
-- 
cgit v1.2.3


From 3f33647c41962401272bb60dce67e6094d14dbf2 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Tue, 26 Jan 2016 21:57:29 +0100
Subject: resource: Add walk_iomem_res_desc()

Add a new interface, walk_iomem_res_desc(), which walks through
the iomem table by identifying a target with @flags and @desc.
This interface provides the same functionality as
walk_iomem_res(), but does not use strcmp() to @name for better
efficiency.

walk_iomem_res() is deprecated and will be removed in a later
patch.

Requested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
[ Fixup comments. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Hanjun Guo <hanjun.guo@linaro.org>
Cc: Jakub Sitnicki <jsitnicki@gmail.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-mm <linux-mm@kvack.org>
Link: http://lkml.kernel.org/r/1453841853-11383-14-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/ioport.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 983bea05d69c..2a4a5e839965 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -268,6 +268,9 @@ extern int
 walk_system_ram_res(u64 start, u64 end, void *arg,
 		    int (*func)(u64, u64, void *));
 extern int
+walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
+		    void *arg, int (*func)(u64, u64, void *));
+extern int
 walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg,
 	       int (*func)(u64, u64, void *));
 
-- 
cgit v1.2.3


From a8fc42530ddd19d7580fe8c9f2ea86220a97e94c Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Tue, 26 Jan 2016 21:57:32 +0100
Subject: resource: Kill walk_iomem_res()

walk_iomem_res_desc() replaced walk_iomem_res() and there is no
caller to walk_iomem_res() any more. Kill it. Also remove @name
from find_next_iomem_res() as it is no longer used.

Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Dave Young <dyoung@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Hanjun Guo <hanjun.guo@linaro.org>
Cc: Jakub Sitnicki <jsitnicki@gmail.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-mm <linux-mm@kvack.org>
Link: http://lkml.kernel.org/r/1453841853-11383-17-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/ioport.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 2a4a5e839965..afb45597fb5f 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -270,9 +270,6 @@ walk_system_ram_res(u64 start, u64 end, void *arg,
 extern int
 walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
 		    void *arg, int (*func)(u64, u64, void *));
-extern int
-walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg,
-	       int (*func)(u64, u64, void *));
 
 /* True if any part of r1 overlaps r2 */
 static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
-- 
cgit v1.2.3


From cb2517653fccaf9f9b4ae968c7ee005c1bbacdc5 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 5 Feb 2016 09:08:36 +0000
Subject: sched/debug: Make schedstats a runtime tunable that is disabled by
 default

schedstats is very useful during debugging and performance tuning but it
incurs overhead to calculate the stats. As such, even though it can be
disabled at build time, it is often enabled as the information is useful.

This patch adds a kernel command-line and sysctl tunable to enable or
disable schedstats on demand (when it's built in). It is disabled
by default as someone who knows they need it can also learn to enable
it when necessary.

The benefits are dependent on how scheduler-intensive the workload is.
If it is then the patch reduces the number of cycles spent calculating
the stats with a small benefit from reducing the cache footprint of the
scheduler.

These measurements were taken from a 48-core 2-socket
machine with Xeon(R) E5-2670 v3 cpus although they were also tested on a
single socket machine 8-core machine with Intel i7-3770 processors.

netperf-tcp
                           4.5.0-rc1             4.5.0-rc1
                             vanilla          nostats-v3r1
Hmean    64         560.45 (  0.00%)      575.98 (  2.77%)
Hmean    128        766.66 (  0.00%)      795.79 (  3.80%)
Hmean    256        950.51 (  0.00%)      981.50 (  3.26%)
Hmean    1024      1433.25 (  0.00%)     1466.51 (  2.32%)
Hmean    2048      2810.54 (  0.00%)     2879.75 (  2.46%)
Hmean    3312      4618.18 (  0.00%)     4682.09 (  1.38%)
Hmean    4096      5306.42 (  0.00%)     5346.39 (  0.75%)
Hmean    8192     10581.44 (  0.00%)    10698.15 (  1.10%)
Hmean    16384    18857.70 (  0.00%)    18937.61 (  0.42%)

Small gains here, UDP_STREAM showed nothing intresting and neither did
the TCP_RR tests. The gains on the 8-core machine were very similar.

tbench4
                                 4.5.0-rc1             4.5.0-rc1
                                   vanilla          nostats-v3r1
Hmean    mb/sec-1         500.85 (  0.00%)      522.43 (  4.31%)
Hmean    mb/sec-2         984.66 (  0.00%)     1018.19 (  3.41%)
Hmean    mb/sec-4        1827.91 (  0.00%)     1847.78 (  1.09%)
Hmean    mb/sec-8        3561.36 (  0.00%)     3611.28 (  1.40%)
Hmean    mb/sec-16       5824.52 (  0.00%)     5929.03 (  1.79%)
Hmean    mb/sec-32      10943.10 (  0.00%)    10802.83 ( -1.28%)
Hmean    mb/sec-64      15950.81 (  0.00%)    16211.31 (  1.63%)
Hmean    mb/sec-128     15302.17 (  0.00%)    15445.11 (  0.93%)
Hmean    mb/sec-256     14866.18 (  0.00%)    15088.73 (  1.50%)
Hmean    mb/sec-512     15223.31 (  0.00%)    15373.69 (  0.99%)
Hmean    mb/sec-1024    14574.25 (  0.00%)    14598.02 (  0.16%)
Hmean    mb/sec-2048    13569.02 (  0.00%)    13733.86 (  1.21%)
Hmean    mb/sec-3072    12865.98 (  0.00%)    13209.23 (  2.67%)

Small gains of 2-4% at low thread counts and otherwise flat.  The
gains on the 8-core machine were slightly different

tbench4 on 8-core i7-3770 single socket machine
Hmean    mb/sec-1        442.59 (  0.00%)      448.73 (  1.39%)
Hmean    mb/sec-2        796.68 (  0.00%)      794.39 ( -0.29%)
Hmean    mb/sec-4       1322.52 (  0.00%)     1343.66 (  1.60%)
Hmean    mb/sec-8       2611.65 (  0.00%)     2694.86 (  3.19%)
Hmean    mb/sec-16      2537.07 (  0.00%)     2609.34 (  2.85%)
Hmean    mb/sec-32      2506.02 (  0.00%)     2578.18 (  2.88%)
Hmean    mb/sec-64      2511.06 (  0.00%)     2569.16 (  2.31%)
Hmean    mb/sec-128     2313.38 (  0.00%)     2395.50 (  3.55%)
Hmean    mb/sec-256     2110.04 (  0.00%)     2177.45 (  3.19%)
Hmean    mb/sec-512     2072.51 (  0.00%)     2053.97 ( -0.89%)

In constract, this shows a relatively steady 2-3% gain at higher thread
counts. Due to the nature of the patch and the type of workload, it's
not a surprise that the result will depend on the CPU used.

hackbench-pipes
                         4.5.0-rc1             4.5.0-rc1
                           vanilla          nostats-v3r1
Amean    1        0.0637 (  0.00%)      0.0660 ( -3.59%)
Amean    4        0.1229 (  0.00%)      0.1181 (  3.84%)
Amean    7        0.1921 (  0.00%)      0.1911 (  0.52%)
Amean    12       0.3117 (  0.00%)      0.2923 (  6.23%)
Amean    21       0.4050 (  0.00%)      0.3899 (  3.74%)
Amean    30       0.4586 (  0.00%)      0.4433 (  3.33%)
Amean    48       0.5910 (  0.00%)      0.5694 (  3.65%)
Amean    79       0.8663 (  0.00%)      0.8626 (  0.43%)
Amean    110      1.1543 (  0.00%)      1.1517 (  0.22%)
Amean    141      1.4457 (  0.00%)      1.4290 (  1.16%)
Amean    172      1.7090 (  0.00%)      1.6924 (  0.97%)
Amean    192      1.9126 (  0.00%)      1.9089 (  0.19%)

Some small gains and losses and while the variance data is not included,
it's close to the noise. The UMA machine did not show anything particularly
different

pipetest
                             4.5.0-rc1             4.5.0-rc1
                               vanilla          nostats-v2r2
Min         Time        4.13 (  0.00%)        3.99 (  3.39%)
1st-qrtle   Time        4.38 (  0.00%)        4.27 (  2.51%)
2nd-qrtle   Time        4.46 (  0.00%)        4.39 (  1.57%)
3rd-qrtle   Time        4.56 (  0.00%)        4.51 (  1.10%)
Max-90%     Time        4.67 (  0.00%)        4.60 (  1.50%)
Max-93%     Time        4.71 (  0.00%)        4.65 (  1.27%)
Max-95%     Time        4.74 (  0.00%)        4.71 (  0.63%)
Max-99%     Time        4.88 (  0.00%)        4.79 (  1.84%)
Max         Time        4.93 (  0.00%)        4.83 (  2.03%)
Mean        Time        4.48 (  0.00%)        4.39 (  1.91%)
Best99%Mean Time        4.47 (  0.00%)        4.39 (  1.91%)
Best95%Mean Time        4.46 (  0.00%)        4.38 (  1.93%)
Best90%Mean Time        4.45 (  0.00%)        4.36 (  1.98%)
Best50%Mean Time        4.36 (  0.00%)        4.25 (  2.49%)
Best10%Mean Time        4.23 (  0.00%)        4.10 (  3.13%)
Best5%Mean  Time        4.19 (  0.00%)        4.06 (  3.20%)
Best1%Mean  Time        4.13 (  0.00%)        4.00 (  3.39%)

Small improvement and similar gains were seen on the UMA machine.

The gain is small but it stands to reason that doing less work in the
scheduler is a good thing. The downside is that the lack of schedstats and
tracepoints may be surprising to experts doing performance analysis until
they find the existence of the schedstats= parameter or schedstats sysctl.
It will be automatically activated for latencytop and sleep profiling to
alleviate the problem. For tracepoints, there is a simple warning as it's
not safe to activate schedstats in the context when it's known the tracepoint
may be wanted but is unavailable.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <mgalbraith@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1454663316-22048-1-git-send-email-mgorman@techsingularity.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/latencytop.h   | 3 +++
 include/linux/sched.h        | 4 ++++
 include/linux/sched/sysctl.h | 4 ++++
 3 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h
index e23121f9d82a..59ccab297ae0 100644
--- a/include/linux/latencytop.h
+++ b/include/linux/latencytop.h
@@ -37,6 +37,9 @@ account_scheduler_latency(struct task_struct *task, int usecs, int inter)
 
 void clear_all_latency_tracing(struct task_struct *p);
 
+extern int sysctl_latencytop(struct ctl_table *table, int write,
+			void __user *buffer, size_t *lenp, loff_t *ppos);
+
 #else
 
 static inline void
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a10494a94cc3..a292c4b7e94c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -920,6 +920,10 @@ static inline int sched_info_on(void)
 #endif
 }
 
+#ifdef CONFIG_SCHEDSTATS
+void force_schedstat_enabled(void);
+#endif
+
 enum cpu_idle_type {
 	CPU_IDLE,
 	CPU_NOT_IDLE,
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index c9e4731cf10b..4f080ab4f2cd 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -95,4 +95,8 @@ extern int sysctl_numa_balancing(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp,
 				 loff_t *ppos);
 
+extern int sysctl_schedstats(struct ctl_table *table, int write,
+				 void __user *buffer, size_t *lenp,
+				 loff_t *ppos);
+
 #endif /* _SCHED_SYSCTL_H */
-- 
cgit v1.2.3


From a63f38cc4ccfa076f87fc3d0c276ee62e710f953 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 3 Feb 2016 13:44:12 -0800
Subject: locking/lockdep: Convert hash tables to hlists

Mike said:

: CONFIG_UBSAN_ALIGNMENT breaks x86-64 kernel with lockdep enabled, i.e.
: kernel with CONFIG_UBSAN_ALIGNMENT=y fails to load without even any error
: message.
:
: The problem is that ubsan callbacks use spinlocks and might be called
: before lockdep is initialized.  Particularly this line in the
: reserve_ebda_region function causes problem:
:
: lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
:
: If i put lockdep_init() before reserve_ebda_region call in
: x86_64_start_reservations kernel loads well.

Fix this ordering issue permanently: change lockdep so that it uses hlists
for the hash tables.  Unlike a list_head, an hlist_head is in its
initialized state when it is all-zeroes, so lockdep is ready for operation
immediately upon boot - lockdep_init() need not have run.

The patch will also save some memory.

Probably lockdep_init() and lockdep_initialized can be done away with now.

Suggested-by: Mike Krinkin <krinkin.m.u@gmail.com>
Reported-by: Mike Krinkin <krinkin.m.u@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Cc: mm-commits@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index c57e424d914b..4dca42fd32f5 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -66,7 +66,7 @@ struct lock_class {
 	/*
 	 * class-hash:
 	 */
-	struct list_head		hash_entry;
+	struct hlist_node		hash_entry;
 
 	/*
 	 * global list of all lock-classes:
@@ -199,7 +199,7 @@ struct lock_chain {
 	u8				irq_context;
 	u8				depth;
 	u16				base;
-	struct list_head		entry;
+	struct hlist_node		entry;
 	u64				chain_key;
 };
 
-- 
cgit v1.2.3


From 06bea3dbfe6a4c333c4333362c46bdf4d9e43504 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Thu, 4 Feb 2016 11:29:36 -0800
Subject: locking/lockdep: Eliminate lockdep_init()

Lockdep is initialized at compile time now.  Get rid of lockdep_init().

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Krinkin <krinkin.m.u@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Cc: mm-commits@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 4dca42fd32f5..d026b190c530 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -261,7 +261,6 @@ struct held_lock {
 /*
  * Initialization, self-test and debugging-output methods:
  */
-extern void lockdep_init(void);
 extern void lockdep_info(void);
 extern void lockdep_reset(void);
 extern void lockdep_reset_lock(struct lockdep_map *lock);
@@ -392,7 +391,6 @@ static inline void lockdep_on(void)
 # define lockdep_set_current_reclaim_state(g)	do { } while (0)
 # define lockdep_clear_current_reclaim_state()	do { } while (0)
 # define lockdep_trace_alloc(g)			do { } while (0)
-# define lockdep_init()				do { } while (0)
 # define lockdep_info()				do { } while (0)
 # define lockdep_init_map(lock, name, key, sub) \
 		do { (void)(name); (void)(key); } while (0)
-- 
cgit v1.2.3


From fed0764fafd8e2e629a033c0f7df4106b0dcb7f0 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 25 Jan 2016 16:33:20 -0500
Subject: locking/atomics: Update comment about READ_ONCE() and structures

The comment is out of data. Also point out the performance drawback
of the barrier();__builtin_memcpy(); barrier() followed by another
copy from stack (__u) to lvalue;

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1453757600-11441-1-git-send-email-konrad.wilk@oracle.com
[ Made it a bit more readable. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 00b042c49ccd..4291592b6433 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -263,8 +263,9 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
  * In contrast to ACCESS_ONCE these two macros will also work on aggregate
  * data types like structs or unions. If the size of the accessed data
  * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
- * READ_ONCE() and WRITE_ONCE()  will fall back to memcpy and print a
- * compile-time warning.
+ * READ_ONCE() and WRITE_ONCE() will fall back to memcpy(). There's at
+ * least two memcpy()s: one for the __builtin_memcpy() and then one for
+ * the macro doing the copy of variable - '__u' allocated on the stack.
  *
  * Their two major use cases are: (1) Mediating communication between
  * process-level code and irq/NMI handlers, all running on the same CPU,
-- 
cgit v1.2.3


From 287e6611ab1eac76c2c5ebf6e345e04c80ca9c61 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 11 Feb 2016 14:16:27 +0100
Subject: libata: fix HDIO_GET_32BIT ioctl

As reported by Soohoon Lee, the HDIO_GET_32BIT ioctl does not
work correctly in compat mode with libata.

I have investigated the issue further and found multiple problems
that all appeared with the same commit that originally introduced
HDIO_GET_32BIT handling in libata back in linux-2.6.8 and presumably
also linux-2.4, as the code uses "copy_to_user(arg, &val, 1)" to copy
a 'long' variable containing either 0 or 1 to user space.

The problems with this are:

* On big-endian machines, this will always write a zero because it
  stores the wrong byte into user space.

* In compat mode, the upper three bytes of the variable are updated
  by the compat_hdio_ioctl() function, but they now contain
  uninitialized stack data.

* The hdparm tool calling this ioctl uses a 'static long' variable
  to store the result. This means at least the upper bytes are
  initialized to zero, but calling another ioctl like HDIO_GET_MULTCOUNT
  would fill them with data that remains stale when the low byte
  is overwritten. Fortunately libata doesn't implement any of the
  affected ioctl commands, so this would only happen when we query
  both an IDE and an ATA device in the same command such as
  "hdparm -N -c /dev/hda /dev/sda"

* The libata code for unknown reasons started using ATA_IOC_GET_IO32
  and ATA_IOC_SET_IO32 as aliases for HDIO_GET_32BIT and HDIO_SET_32BIT,
  while the ioctl commands that were added later use the normal
  HDIO_* names. This is harmless but rather confusing.

This addresses all four issues by changing the code to use put_user()
on an 'unsigned long' variable in HDIO_GET_32BIT, like the IDE subsystem
does, and by clarifying the names of the ioctl commands.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reported-by: Soohoon Lee <Soohoon.Lee@f5.com>
Tested-by: Soohoon Lee <Soohoon.Lee@f5.com>
Cc: stable@vger.kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/ata.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ata.h b/include/linux/ata.h
index d2992bfa1706..c1a2f345cbe6 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -487,8 +487,8 @@ enum ata_tf_protocols {
 };
 
 enum ata_ioctls {
-	ATA_IOC_GET_IO32	= 0x309,
-	ATA_IOC_SET_IO32	= 0x324,
+	ATA_IOC_GET_IO32	= 0x309, /* HDIO_GET_32BIT */
+	ATA_IOC_SET_IO32	= 0x324, /* HDIO_SET_32BIT */
 };
 
 /* core structures */
-- 
cgit v1.2.3


From 5fd7a09cfb8c6852f596c1f8c891c6158395250e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 11 Aug 2015 18:03:23 +0200
Subject: atomic: Export fetch_or()

Export fetch_or() that's implemented and used internally by the
scheduler. We are going to use it for NO_HZ so make it generally
available.

Reviewed-by: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/atomic.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 301de78d65f7..6c502cb13c95 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 }
 #endif
 
+/**
+ * fetch_or - perform *ptr |= mask and return old value of *ptr
+ * @ptr: pointer to value
+ * @mask: mask to OR on the value
+ *
+ * cmpxchg based fetch_or, macro so it works for different integer types
+ */
+#ifndef fetch_or
+#define fetch_or(ptr, mask)						\
+({	typeof(*(ptr)) __old, __val = *(ptr);				\
+	for (;;) {							\
+		__old = cmpxchg((ptr), __val, __val | (mask));		\
+		if (__old == __val)					\
+			break;						\
+		__val = __old;						\
+	}								\
+	__old;								\
+})
+#endif
+
+
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
-- 
cgit v1.2.3


From 1f4522400e7e49464da5e584a463bd315283790f Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Thu, 11 Feb 2016 12:28:55 -0200
Subject: [media] [for,v4.5] media.h: increase the spacing between function
 ranges

Each function range is quite narrow and especially for connectors this
will pose a problem. Increase the function ranges while we still can and
move the connector range to the end so that range is practically limitless.

[mchehab@osg.samsung.com: Rebased to apply at Linus tree]
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/media.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 1e3c8cb43bd7..8b87bddecf1c 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -72,21 +72,21 @@ struct media_device_info {
 #define MEDIA_ENT_F_DTV_NET_DECAP	(MEDIA_ENT_F_BASE + 4)
 
 /*
- * Connectors
+ * I/O entities
  */
-/* It is a responsibility of the entity drivers to add connectors and links */
-#define MEDIA_ENT_F_CONN_RF		(MEDIA_ENT_F_BASE + 21)
-#define MEDIA_ENT_F_CONN_SVIDEO		(MEDIA_ENT_F_BASE + 22)
-#define MEDIA_ENT_F_CONN_COMPOSITE	(MEDIA_ENT_F_BASE + 23)
-/* For internal test signal generators and other debug connectors */
-#define MEDIA_ENT_F_CONN_TEST		(MEDIA_ENT_F_BASE + 24)
+#define MEDIA_ENT_F_IO_DTV		(MEDIA_ENT_F_BASE + 1001)
+#define MEDIA_ENT_F_IO_VBI		(MEDIA_ENT_F_BASE + 1002)
+#define MEDIA_ENT_F_IO_SWRADIO		(MEDIA_ENT_F_BASE + 1003)
 
 /*
- * I/O entities
+ * Connectors
  */
-#define MEDIA_ENT_F_IO_DTV  		(MEDIA_ENT_F_BASE + 31)
-#define MEDIA_ENT_F_IO_VBI  		(MEDIA_ENT_F_BASE + 32)
-#define MEDIA_ENT_F_IO_SWRADIO		(MEDIA_ENT_F_BASE + 33)
+/* It is a responsibility of the entity drivers to add connectors and links */
+#define MEDIA_ENT_F_CONN_RF		(MEDIA_ENT_F_BASE + 10001)
+#define MEDIA_ENT_F_CONN_SVIDEO		(MEDIA_ENT_F_BASE + 10002)
+#define MEDIA_ENT_F_CONN_COMPOSITE	(MEDIA_ENT_F_BASE + 10003)
+/* For internal test signal generators and other debug connectors */
+#define MEDIA_ENT_F_CONN_TEST		(MEDIA_ENT_F_BASE + 10004)
 
 /*
  * Don't touch on those. The ranges MEDIA_ENT_F_OLD_BASE and
-- 
cgit v1.2.3


From 9727a9545adec59f4bccb83d1a709711f4acf242 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Fri, 12 Feb 2016 15:44:31 -0200
Subject: [media] media.h: get rid of MEDIA_ENT_F_CONN_TEST

Defining it as a connector was a bad idea. Remove it while it is
not too late.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/media.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 8b87bddecf1c..f0328524be6e 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -85,8 +85,6 @@ struct media_device_info {
 #define MEDIA_ENT_F_CONN_RF		(MEDIA_ENT_F_BASE + 10001)
 #define MEDIA_ENT_F_CONN_SVIDEO		(MEDIA_ENT_F_BASE + 10002)
 #define MEDIA_ENT_F_CONN_COMPOSITE	(MEDIA_ENT_F_BASE + 10003)
-/* For internal test signal generators and other debug connectors */
-#define MEDIA_ENT_F_CONN_TEST		(MEDIA_ENT_F_BASE + 10004)
 
 /*
  * Don't touch on those. The ranges MEDIA_ENT_F_OLD_BASE and
-- 
cgit v1.2.3


From e267d97b83d9cecc16c54825f9f3ac7f72dc1e1e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 17 Feb 2016 14:41:12 -0800
Subject: asm-generic: Consolidate mark_rodata_ro()

Instead of defining mark_rodata_ro() in each architecture, consolidate it.

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Gross <agross@codeaurora.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ashok Kumar <ashoks@broadcom.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Brown <david.brown@linaro.org>
Cc: David Hildenbrand <dahi@linux.vnet.ibm.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Emese Revfy <re.emese@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Helge Deller <deller@gmx.de>
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathias Krause <minipli@googlemail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: PaX Team <pageexec@freemail.hu>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: kernel-hardening@lists.openwall.com
Cc: linux-arch <linux-arch@vger.kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-parisc@vger.kernel.org
Link: http://lkml.kernel.org/r/1455748879-21872-2-git-send-email-keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/init.h b/include/linux/init.h
index b449f378f995..aedb254abc37 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -142,6 +142,10 @@ void prepare_namespace(void);
 void __init load_default_modules(void);
 int __init init_rootfs(void);
 
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void);
+#endif
+
 extern void (*late_time_init)(void);
 
 extern bool initcall_debug;
-- 
cgit v1.2.3


From c74ba8b3480da6ddaea17df2263ec09b869ac496 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 17 Feb 2016 14:41:15 -0800
Subject: arch: Introduce post-init read-only memory

One of the easiest ways to protect the kernel from attack is to reduce
the internal attack surface exposed when a "write" flaw is available. By
making as much of the kernel read-only as possible, we reduce the
attack surface.

Many things are written to only during __init, and never changed
again. These cannot be made "const" since the compiler will do the wrong
thing (we do actually need to write to them). Instead, move these items
into a memory region that will be made read-only during mark_rodata_ro()
which happens after all kernel __init code has finished.

This introduces __ro_after_init as a way to mark such memory, and adds
some documentation about the existing __read_mostly marking.

This improves the security of the Linux kernel by marking formerly
read-write memory regions as read-only on a fully booted up system.

Based on work by PaX Team and Brad Spengler.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brad Spengler <spender@grsecurity.net>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Brown <david.brown@linaro.org>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Emese Revfy <re.emese@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathias Krause <minipli@googlemail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: PaX Team <pageexec@freemail.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kernel-hardening@lists.openwall.com
Cc: linux-arch <linux-arch@vger.kernel.org>
Link: http://lkml.kernel.org/r/1455748879-21872-5-git-send-email-keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/vmlinux.lds.h |  1 +
 include/linux/cache.h             | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c4bd0e2c173c..772c784ba763 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -256,6 +256,7 @@
 	.rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {		\
 		VMLINUX_SYMBOL(__start_rodata) = .;			\
 		*(.rodata) *(.rodata.*)					\
+		*(.data..ro_after_init)	/* Read only after init */	\
 		*(__vermagic)		/* Kernel version magic */	\
 		. = ALIGN(8);						\
 		VMLINUX_SYMBOL(__start___tracepoints_ptrs) = .;		\
diff --git a/include/linux/cache.h b/include/linux/cache.h
index 17e7e82d2aa7..1be04f8c563a 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -12,10 +12,24 @@
 #define SMP_CACHE_BYTES L1_CACHE_BYTES
 #endif
 
+/*
+ * __read_mostly is used to keep rarely changing variables out of frequently
+ * updated cachelines. If an architecture doesn't support it, ignore the
+ * hint.
+ */
 #ifndef __read_mostly
 #define __read_mostly
 #endif
 
+/*
+ * __ro_after_init is used to mark things that are read-only after init (i.e.
+ * after mark_rodata_ro() has been called). These are effectively read-only,
+ * but may get written to during init, so can't live in .rodata (via "const").
+ */
+#ifndef __ro_after_init
+#define __ro_after_init __attribute__((__section__(".data..ro_after_init")))
+#endif
+
 #ifndef ____cacheline_aligned
 #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
 #endif
-- 
cgit v1.2.3


From ad315455d396a1cbcb2f9fdd687b7e1b26b789e7 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Tue, 29 Dec 2015 12:18:46 +0800
Subject: sparse: Add __private to privatize members of structs

In C programming language, we don't have a easy way to privatize a
member of a structure. However in kernel, sometimes there is a need to
privatize a member in case of potential bugs or misuses.

Fortunately, the noderef attribute of sparse is a way to privatize a
member, as by defining a member as noderef, the address-of operator on
the member will produce a noderef pointer to that member, and if anyone
wants to dereference that kind of pointers to read or modify the member,
sparse will yell.

Based on this, __private modifier and related operation ACCESS_PRIVATE()
are introduced, which could help detect undesigned public uses of
private members of structs. Here is an example of sparse's output if it
detect an undersigned public use:

| kernel/rcu/tree.c:4453:25: warning: incorrect type in argument 1 (different modifiers)
| kernel/rcu/tree.c:4453:25:    expected struct raw_spinlock [usertype] *lock
| kernel/rcu/tree.c:4453:25:    got struct raw_spinlock [noderef] *<noident>

Also, this patch improves compiler.h a little bit by adding comments for
"#else" and "#endif".

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/compiler.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 00b042c49ccd..c845356952bb 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -20,12 +20,14 @@
 # define __pmem		__attribute__((noderef, address_space(5)))
 #ifdef CONFIG_SPARSE_RCU_POINTER
 # define __rcu		__attribute__((noderef, address_space(4)))
-#else
+#else /* CONFIG_SPARSE_RCU_POINTER */
 # define __rcu
-#endif
+#endif /* CONFIG_SPARSE_RCU_POINTER */
+# define __private	__attribute__((noderef))
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
-#else
+# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member))
+#else /* __CHECKER__ */
 # define __user
 # define __kernel
 # define __safe
@@ -44,7 +46,9 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __percpu
 # define __rcu
 # define __pmem
-#endif
+# define __private
+# define ACCESS_PRIVATE(p, member) ((p)->member)
+#endif /* __CHECKER__ */
 
 /* Indirect macros required for expanded argument pasting, eg. __LINE__. */
 #define ___PASTE(a,b) a##b
-- 
cgit v1.2.3


From b354286effa52da6cb1b1f16604d41ff81b8c445 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Tue, 29 Dec 2015 12:18:48 +0800
Subject: irq: Privatize irq_common_data::state_use_accessors

irq_common_data::state_use_accessors is not designed for public use.
Therefore make it private so that people who write code accessing it
directly will get blamed by sparse. Also #undef the macro
__irqd_to_state after used in header files, so that the macro can't be
misused.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/irq.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 3c1c96786248..cd14cd4a22b4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -137,7 +137,7 @@ struct irq_domain;
  * @msi_desc:		MSI descriptor
  */
 struct irq_common_data {
-	unsigned int		state_use_accessors;
+	unsigned int		__private state_use_accessors;
 #ifdef CONFIG_NUMA
 	unsigned int		node;
 #endif
@@ -208,7 +208,7 @@ enum {
 	IRQD_FORWARDED_TO_VCPU		= (1 << 20),
 };
 
-#define __irqd_to_state(d)		((d)->common->state_use_accessors)
+#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
 
 static inline bool irqd_is_setaffinity_pending(struct irq_data *d)
 {
@@ -299,6 +299,8 @@ static inline void irqd_clr_forwarded_to_vcpu(struct irq_data *d)
 	__irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU;
 }
 
+#undef __irqd_to_state
+
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
 {
 	return d->hwirq;
-- 
cgit v1.2.3


From 9de630c4f264dec48e61edd871cb98b8e1b58250 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 8 Jan 2016 07:43:50 -0800
Subject: rcu: Document unique-name limitation for DEFINE_STATIC_SRCU()

SRCU uses per-CPU variables, and DEFINE_STATIC_SRCU() uses a static
per-CPU variable.  However, per-CPU variables have significant
restrictions, for example, names of per-CPU variables must be globally
unique, even if declared static.  These restrictions carry over to
DEFINE_STATIC_SRCU(), and this commit therefore documents these
restrictions.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Suggested-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Tejun Heo <tj@kernel.org>
---
 include/linux/srcu.h | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index f5f80c5643ac..dc8eb63c6568 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -99,8 +99,23 @@ void process_srcu(struct work_struct *work);
 	}
 
 /*
- * define and init a srcu struct at build time.
- * dont't call init_srcu_struct() nor cleanup_srcu_struct() on it.
+ * Define and initialize a srcu struct at build time.
+ * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
+ *
+ * Note that although DEFINE_STATIC_SRCU() hides the name from other
+ * files, the per-CPU variable rules nevertheless require that the
+ * chosen name be globally unique.  These rules also prohibit use of
+ * DEFINE_STATIC_SRCU() within a function.  If these rules are too
+ * restrictive, declare the srcu_struct manually.  For example, in
+ * each file:
+ *
+ *	static struct srcu_struct my_srcu;
+ *
+ * Then, before the first use of each my_srcu, manually initialize it:
+ *
+ *	init_srcu_struct(&my_srcu);
+ *
+ * See include/linux/percpu-defs.h for the rules on per-CPU variables.
  */
 #define __DEFINE_SRCU(name, is_static)					\
 	static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
-- 
cgit v1.2.3


From 3500efae4410454522697c94c23fc40323c0cee9 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linaro.org>
Date: Mon, 19 Oct 2015 14:45:02 -0700
Subject: rcu: Remove rcu_user_hooks_switch

Because there are neither uses nor intended uses for the
rcu_user_hooks_switch() function that was orginally intended
for nohz use, this commit removes it.

Signed-off-by: Yang Shi <yang.shi@linaro.org>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 14e6f47ee16f..b5d48bd56e3f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -360,8 +360,6 @@ void rcu_user_exit(void);
 #else
 static inline void rcu_user_enter(void) { }
 static inline void rcu_user_exit(void) { }
-static inline void rcu_user_hooks_switch(struct task_struct *prev,
-					 struct task_struct *next) { }
 #endif /* CONFIG_NO_HZ_FULL */
 
 #ifdef CONFIG_RCU_NOCB_CPU
-- 
cgit v1.2.3


From f4bcfa1da6fdcbc7a0854a28603bffc3c5656332 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Tue, 23 Feb 2016 22:39:28 +0900
Subject: sched/wait: Fix wait_event_freezable() documentation

I noticed the comment label 'wait_event' was wrong.

Signed-off-by: Stafford Horne <shorne@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1456234768-24933-1-git-send-email-shorne@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index ae71a769b89e..27d7a0ab5da3 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -338,7 +338,7 @@ do {									\
 			    schedule(); try_to_freeze())
 
 /**
- * wait_event - sleep (or freeze) until a condition gets true
+ * wait_event_freezable - sleep (or freeze) until a condition gets true
  * @wq: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *
-- 
cgit v1.2.3


From 2da897e51d7f23f872224218b9a4314503b0d360 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 23 Feb 2016 02:05:26 +0100
Subject: bpf: fix csum setting for bpf_set_tunnel_key

The fix in 35e2d1152b22 ("tunnels: Allow IPv6 UDP checksums to be correctly
controlled.") changed behavior for bpf_set_tunnel_key() when in use with
IPv6 and thus uncovered a bug that TUNNEL_CSUM needed to be set but wasn't.
As a result, the stack dropped ingress vxlan IPv6 packets, that have been
sent via eBPF through collect meta data mode due to checksum now being zero.

Since after LCO, we enable IPv4 checksum by default, so make that analogous
and only provide a flag BPF_F_ZERO_CSUM_TX for the user to turn it off in
IPv4 case.

Fixes: 35e2d1152b22 ("tunnels: Allow IPv6 UDP checksums to be correctly controlled.")
Fixes: c6c33454072f ("bpf: support ipv6 for bpf_skb_{set,get}_tunnel_key")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index aa6f8571de13..5df4881dea7b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -292,6 +292,9 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
 #define BPF_F_TUNINFO_IPV6		(1ULL << 0)
 
+/* BPF_FUNC_skb_set_tunnel_key flags. */
+#define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
-- 
cgit v1.2.3


From 0abefbaab4edbcec637e00fefcdeccb52797fe4f Mon Sep 17 00:00:00 2001
From: Qais Yousef <qais.yousef@imgtec.com>
Date: Tue, 8 Dec 2015 13:20:12 +0000
Subject: genirq: Add new IPI irqdomain flags

These flags will be used to identify an IPI domain. We have two flavours of
IPI implementations:

IRQ_DOMAIN_FLAG_IPI_PER_CPU: Each CPU has its own virq and hwirq
IRQ_DOMAIN_FLAG_IPI_SINGLE : A single virq and hwirq for all CPUs

Signed-off-by: Qais Yousef <qais.yousef@imgtec.com>
Cc: <jason@lakedaemon.net>
Cc: <marc.zyngier@arm.com>
Cc: <jiang.liu@linux.intel.com>
Cc: <ralf@linux-mips.org>
Cc: <linux-mips@linux-mips.org>
Cc: <lisa.parratt@imgtec.com>
Cc: Qais Yousef <qsyousef@gmail.com>
Link: http://lkml.kernel.org/r/1449580830-23652-2-git-send-email-qais.yousef@imgtec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 04579d9fbce4..9bb0a9cfc1c4 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -172,6 +172,12 @@ enum {
 	/* Core calls alloc/free recursive through the domain hierarchy. */
 	IRQ_DOMAIN_FLAG_AUTO_RECURSIVE	= (1 << 1),
 
+	/* Irq domain is an IPI domain with virq per cpu */
+	IRQ_DOMAIN_FLAG_IPI_PER_CPU	= (1 << 2),
+
+	/* Irq domain is an IPI domain with single virq */
+	IRQ_DOMAIN_FLAG_IPI_SINGLE	= (1 << 3),
+
 	/*
 	 * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
 	 * for implementation specific purposes and ignored by the
@@ -400,6 +406,22 @@ static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
 {
 	return domain->flags & IRQ_DOMAIN_FLAG_HIERARCHY;
 }
+
+static inline bool irq_domain_is_ipi(struct irq_domain *domain)
+{
+	return domain->flags &
+		(IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE);
+}
+
+static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain)
+{
+	return domain->flags & IRQ_DOMAIN_FLAG_IPI_PER_CPU;
+}
+
+static inline bool irq_domain_is_ipi_single(struct irq_domain *domain)
+{
+	return domain->flags & IRQ_DOMAIN_FLAG_IPI_SINGLE;
+}
 #else	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
 static inline void irq_domain_activate_irq(struct irq_data *data) { }
 static inline void irq_domain_deactivate_irq(struct irq_data *data) { }
@@ -413,6 +435,21 @@ static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
 {
 	return false;
 }
+
+static inline bool irq_domain_is_ipi(struct irq_domain *domain)
+{
+	return false;
+}
+
+static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain)
+{
+	return false;
+}
+
+static inline bool irq_domain_is_ipi_single(struct irq_domain *domain)
+{
+	return false;
+}
 #endif	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
 
 #else /* CONFIG_IRQ_DOMAIN */
-- 
cgit v1.2.3


From 29d5c8db26ad54592436508819ac617119306f96 Mon Sep 17 00:00:00 2001
From: Qais Yousef <qais.yousef@imgtec.com>
Date: Tue, 8 Dec 2015 13:20:13 +0000
Subject: genirq: Add DOMAIN_BUS_IPI

We need a way to search and match IPI domains.

Using the new enum we can use irq_find_matching_host() to do that.

Signed-off-by: Qais Yousef <qais.yousef@imgtec.com>
Cc: <jason@lakedaemon.net>
Cc: <marc.zyngier@arm.com>
Cc: <jiang.liu@linux.intel.com>
Cc: <ralf@linux-mips.org>
Cc: <linux-mips@linux-mips.org>
Cc: <lisa.parratt@imgtec.com>
Cc: Qais Yousef <qsyousef@gmail.com>
Link: http://lkml.kernel.org/r/1449580830-23652-3-git-send-email-qais.yousef@imgtec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 9bb0a9cfc1c4..130e1c3117c3 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -74,6 +74,7 @@ enum irq_domain_bus_token {
 	DOMAIN_BUS_PCI_MSI,
 	DOMAIN_BUS_PLATFORM_MSI,
 	DOMAIN_BUS_NEXUS,
+	DOMAIN_BUS_IPI,
 };
 
 /**
-- 
cgit v1.2.3


From 955bfe5912e7839abcc83694f06867535487404b Mon Sep 17 00:00:00 2001
From: Qais Yousef <qais.yousef@imgtec.com>
Date: Tue, 8 Dec 2015 13:20:17 +0000
Subject: genirq: Add an extra comment about the use of affinity in
 irq_common_data

Affinity will have dual meaning depends on the type of the irq. If it is
a normal irq, it'll have the standard affinity meaning.

If it is an IPI, it will hold the mask of the cpus to which an IPI can be
sent.

Signed-off-by: Qais Yousef <qais.yousef@imgtec.com>
Cc: <jason@lakedaemon.net>
Cc: <marc.zyngier@arm.com>
Cc: <jiang.liu@linux.intel.com>
Cc: <ralf@linux-mips.org>
Cc: <linux-mips@linux-mips.org>
Cc: <lisa.parratt@imgtec.com>
Cc: Qais Yousef <qsyousef@gmail.com>
Link: http://lkml.kernel.org/r/1449580830-23652-7-git-send-email-qais.yousef@imgtec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 3c1c96786248..0817afd0d719 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -133,7 +133,9 @@ struct irq_domain;
  *			Use accessor functions to deal with it
  * @node:		node index useful for balancing
  * @handler_data:	per-IRQ data for the irq_chip methods
- * @affinity:		IRQ affinity on SMP
+ * @affinity:		IRQ affinity on SMP. If this is an IPI
+ *			related irq, then this is the mask of the
+ *			CPUs to which an IPI can be sent.
  * @msi_desc:		MSI descriptor
  */
 struct irq_common_data {
-- 
cgit v1.2.3


From f256c9a0c54820ffef21b126f8226be2bece3dd7 Mon Sep 17 00:00:00 2001
From: Qais Yousef <qais.yousef@imgtec.com>
Date: Tue, 8 Dec 2015 13:20:16 +0000
Subject: genirq: Add ipi_offset to irq_common_data

IPIs are always assumed to be consecutively allocated, hence virqs and hwirqs
can be inferred by using CPU id as an offset. But the first cpu doesn't always
have to start at offset 0. ipi_offset stores the position of the first cpu so
that we can easily calculate the virq or hwirq of an IPI associated with a
specific cpu.

Signed-off-by: Qais Yousef <qais.yousef@imgtec.com>
Cc: <jason@lakedaemon.net>
Cc: <marc.zyngier@arm.com>
Cc: <jiang.liu@linux.intel.com>
Cc: <ralf@linux-mips.org>
Cc: <linux-mips@linux-mips.org>
Cc: <lisa.parratt@imgtec.com>
Cc: Qais Yousef <qsyousef@gmail.com>
Link: http://lkml.kernel.org/r/1449580830-23652-6-git-send-email-qais.yousef@imgtec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 0817afd0d719..a32b47fbf874 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -137,6 +137,7 @@ struct irq_domain;
  *			related irq, then this is the mask of the
  *			CPUs to which an IPI can be sent.
  * @msi_desc:		MSI descriptor
+ * @ipi_offset:		Offset of first IPI target cpu in @affinity. Optional.
  */
 struct irq_common_data {
 	unsigned int		state_use_accessors;
@@ -146,6 +147,9 @@ struct irq_common_data {
 	void			*handler_data;
 	struct msi_desc		*msi_desc;
 	cpumask_var_t		affinity;
+#ifdef CONFIG_GENERIC_IRQ_IPI
+	unsigned int		ipi_offset;
+#endif
 };
 
 /**
-- 
cgit v1.2.3


From ac0a0cd266d1f21236d5975ca6bced9b377a2a6a Mon Sep 17 00:00:00 2001
From: Qais Yousef <qais.yousef@imgtec.com>
Date: Tue, 8 Dec 2015 13:20:18 +0000
Subject: genirq: Make irq_domain_alloc_descs() non static

We will need to use this function to implement irq_reserve_ipi() later. So
make it non static and move the prototype to irqdomain.h to allow using it
outside irqdomain.c

Signed-off-by: Qais Yousef <qais.yousef@imgtec.com>
Cc: <jason@lakedaemon.net>
Cc: <marc.zyngier@arm.com>
Cc: <jiang.liu@linux.intel.com>
Cc: <ralf@linux-mips.org>
Cc: <linux-mips@linux-mips.org>
Cc: <lisa.parratt@imgtec.com>
Cc: Qais Yousef <qsyousef@gmail.com>
Link: http://lkml.kernel.org/r/1449580830-23652-8-git-send-email-qais.yousef@imgtec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 130e1c3117c3..c466cc17b8e9 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -213,6 +213,8 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
 extern struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
 						   enum irq_domain_bus_token bus_token);
 extern void irq_set_default_host(struct irq_domain *host);
+extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
+				  irq_hw_number_t hwirq, int node);
 
 static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
 {
-- 
cgit v1.2.3


From d17bf24e695290d3fe7943aca52ab48098a10653 Mon Sep 17 00:00:00 2001
From: Qais Yousef <qais.yousef@imgtec.com>
Date: Tue, 8 Dec 2015 13:20:19 +0000
Subject: genirq: Add a new generic IPI reservation code to irq core

Add a generic mechanism to dynamically allocate an IPI. Depending on the
underlying implementation this creates either a single Linux irq or a
consective range of Linux irqs. The Linux irq is used later to send IPIs to
other CPUs.

[ tglx: Massaged the code and removed the 'consecutive mask' restriction for
  	the single IRQ case ]

Signed-off-by: Qais Yousef <qais.yousef@imgtec.com>
Cc: <jason@lakedaemon.net>
Cc: <marc.zyngier@arm.com>
Cc: <jiang.liu@linux.intel.com>
Cc: <ralf@linux-mips.org>
Cc: <linux-mips@linux-mips.org>
Cc: <lisa.parratt@imgtec.com>
Cc: Qais Yousef <qsyousef@gmail.com>
Link: http://lkml.kernel.org/r/1449580830-23652-9-git-send-email-qais.yousef@imgtec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h       | 3 +++
 include/linux/irqdomain.h | 5 +++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index a32b47fbf874..95f4f66f95f3 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -940,4 +940,7 @@ static inline u32 irq_reg_readl(struct irq_chip_generic *gc,
 		return readl(gc->reg_base + reg_offset);
 }
 
+/* Contrary to Linux irqs, for hardware irqs the irq number 0 is valid */
+#define INVALID_HWIRQ	(~0UL)
+
 #endif /* _LINUX_IRQ_H */
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index c466cc17b8e9..ed48594e96d2 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -344,6 +344,11 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr,
 			const u32 *intspec, unsigned int intsize,
 			irq_hw_number_t *out_hwirq, unsigned int *out_type);
 
+/* IPI functions */
+unsigned int irq_reserve_ipi(struct irq_domain *domain,
+			     const struct cpumask *dest);
+void irq_destroy_ipi(unsigned int irq);
+
 /* V2 interfaces to support hierarchy IRQ domains. */
 extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
 						unsigned int virq);
-- 
cgit v1.2.3


From f9bce791ae2a1a10a965b30427f5507c1a77669f Mon Sep 17 00:00:00 2001
From: Qais Yousef <qais.yousef@imgtec.com>
Date: Tue, 8 Dec 2015 13:20:20 +0000
Subject: genirq: Add a new function to get IPI reverse mapping

When dealing with coprocessors we need to find out the actual hwirqs values to
pass on to the firmware so that it knows what it needs to use to receive IPIs
from and send IPIs to Linux cpus.

[ tglx: Fixed the single hwirq IPI case. The hardware irq number does not
  	change due to the cpu number ]

Signed-off-by: Qais Yousef <qais.yousef@imgtec.com>
Cc: <jason@lakedaemon.net>
Cc: <marc.zyngier@arm.com>
Cc: <jiang.liu@linux.intel.com>
Cc: <ralf@linux-mips.org>
Cc: <linux-mips@linux-mips.org>
Cc: <lisa.parratt@imgtec.com>
Cc: Qais Yousef <qsyousef@gmail.com>
Link: http://lkml.kernel.org/r/1449580830-23652-10-git-send-email-qais.yousef@imgtec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 95f4f66f95f3..10273dce058a 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -942,5 +942,6 @@ static inline u32 irq_reg_readl(struct irq_chip_generic *gc,
 
 /* Contrary to Linux irqs, for hardware irqs the irq number 0 is valid */
 #define INVALID_HWIRQ	(~0UL)
+irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu);
 
 #endif /* _LINUX_IRQ_H */
-- 
cgit v1.2.3


From 34dc1ae101018dbb50e1d04e88aa89052802a7db Mon Sep 17 00:00:00 2001
From: Qais Yousef <qais.yousef@imgtec.com>
Date: Tue, 8 Dec 2015 13:20:21 +0000
Subject: genirq: Add send_ipi callbacks to irq_chip

Introduce the new callbacks which can be used by the core code to implement a
generic IPI send mechanism.

Signed-off-by: Qais Yousef <qais.yousef@imgtec.com>
Cc: <jason@lakedaemon.net>
Cc: <marc.zyngier@arm.com>
Cc: <jiang.liu@linux.intel.com>
Cc: <ralf@linux-mips.org>
Cc: <linux-mips@linux-mips.org>
Cc: <lisa.parratt@imgtec.com>
Cc: Qais Yousef <qsyousef@gmail.com>
Link: http://lkml.kernel.org/r/1449580830-23652-11-git-send-email-qais.yousef@imgtec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 10273dce058a..3b3a5b817469 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -347,6 +347,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
  * @irq_get_irqchip_state:	return the internal state of an interrupt
  * @irq_set_irqchip_state:	set the internal state of a interrupt
  * @irq_set_vcpu_affinity:	optional to target a vCPU in a virtual machine
+ * @ipi_send_single:	send a single IPI to destination cpus
+ * @ipi_send_mask:	send an IPI to destination cpus in cpumask
  * @flags:		chip specific flags
  */
 struct irq_chip {
@@ -391,6 +393,9 @@ struct irq_chip {
 
 	int		(*irq_set_vcpu_affinity)(struct irq_data *data, void *vcpu_info);
 
+	void		(*ipi_send_single)(struct irq_data *data, unsigned int cpu);
+	void		(*ipi_send_mask)(struct irq_data *data, const struct cpumask *dest);
+
 	unsigned long	flags;
 };
 
-- 
cgit v1.2.3


From 3b8e29a82dd16c1f2061e0b955a71cd36eeb061b Mon Sep 17 00:00:00 2001
From: Qais Yousef <qais.yousef@imgtec.com>
Date: Tue, 8 Dec 2015 13:20:22 +0000
Subject: genirq: Implement ipi_send_mask/single()

Add APIs to send IPIs from driver and arch code.

We have different functions because we allow architecture code to cache the
irq descriptor to avoid lookups. Driver code has to use the irq number and is
subject to more restrictive checks.

[ tglx: Polish the implementation ]

Signed-off-by: Qais Yousef <qais.yousef@imgtec.com>
Cc: <jason@lakedaemon.net>
Cc: <marc.zyngier@arm.com>
Cc: <jiang.liu@linux.intel.com>
Cc: <ralf@linux-mips.org>
Cc: <linux-mips@linux-mips.org>
Cc: <lisa.parratt@imgtec.com>
Cc: Qais Yousef <qsyousef@gmail.com>
Link: http://lkml.kernel.org/r/1449580830-23652-12-git-send-email-qais.yousef@imgtec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 3b3a5b817469..d5ebd94822d2 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -948,5 +948,9 @@ static inline u32 irq_reg_readl(struct irq_chip_generic *gc,
 /* Contrary to Linux irqs, for hardware irqs the irq number 0 is valid */
 #define INVALID_HWIRQ	(~0UL)
 irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu);
+int __ipi_send_single(struct irq_desc *desc, unsigned int cpu);
+int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest);
+int ipi_send_single(unsigned int virq, unsigned int cpu);
+int ipi_send_mask(unsigned int virq, const struct cpumask *dest);
 
 #endif /* _LINUX_IRQ_H */
-- 
cgit v1.2.3


From bb11cff327e54179c13446c4022ed4ed7d4871c7 Mon Sep 17 00:00:00 2001
From: Qais Yousef <qais.yousef@imgtec.com>
Date: Tue, 8 Dec 2015 13:20:28 +0000
Subject: MIPS: Make smp CMP, CPS and MT use the new generic IPI functions

This commit does several things to avoid breaking bisectability.

	1- Remove IPI init code from irqchip/mips-gic
	2- Implement the new irqchip->send_ipi() in irqchip/mips-gic
	3- Select GENERIC_IRQ_IPI Kconfig symbol for MIPS_GIC
	4- Change MIPS SMP to use the generic IPI implementation

Only the SMP variants that use GIC were converted as it's the only irqchip that
will have the support for generic IPI for now.

Signed-off-by: Qais Yousef <qais.yousef@imgtec.com>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Cc: <jason@lakedaemon.net>
Cc: <marc.zyngier@arm.com>
Cc: <jiang.liu@linux.intel.com>
Cc: <linux-mips@linux-mips.org>
Cc: <lisa.parratt@imgtec.com>
Cc: Qais Yousef <qsyousef@gmail.com>
Link: http://lkml.kernel.org/r/1449580830-23652-18-git-send-email-qais.yousef@imgtec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqchip/mips-gic.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h
index ce824db48d64..80f89e4a29ac 100644
--- a/include/linux/irqchip/mips-gic.h
+++ b/include/linux/irqchip/mips-gic.h
@@ -261,9 +261,6 @@ extern void gic_write_compare(cycle_t cnt);
 extern void gic_write_cpu_compare(cycle_t cnt, int cpu);
 extern void gic_start_count(void);
 extern void gic_stop_count(void);
-extern void gic_send_ipi(unsigned int intr);
-extern unsigned int plat_ipi_call_int_xlate(unsigned int);
-extern unsigned int plat_ipi_resched_int_xlate(unsigned int);
 extern int gic_get_c0_compare_int(void);
 extern int gic_get_c0_perfcount_int(void);
 extern int gic_get_c0_fdc_int(void);
-- 
cgit v1.2.3


From 13b35686e8b934ff78f59cef0c65fa3a43f8eeaf Mon Sep 17 00:00:00 2001
From: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Date: Fri, 19 Feb 2016 09:46:37 +0100
Subject: wait.[ch]: Introduce the simple waitqueue (swait) implementation

The existing wait queue support has support for custom wake up call
backs, wake flags, wake key (passed to call back) and exclusive
flags that allow wakers to be tagged as exclusive, for limiting
the number of wakers.

In a lot of cases, none of these features are used, and hence we
can benefit from a slimmed down version that lowers memory overhead
and reduces runtime overhead.

The concept originated from -rt, where waitqueues are a constant
source of trouble, as we can't convert the head lock to a raw
spinlock due to fancy and long lasting callbacks.

With the removal of custom callbacks, we can use a raw lock for
queue list manipulations, hence allowing the simple wait support
to be used in -rt.

[Patch is from PeterZ which is based on Thomas version. Commit message is
 written by Paul G.
 Daniel:  - Fixed some compile issues
 	  - Added non-lazy implementation of swake_up_locked as suggested
	     by Boqun Feng.]

Originally-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: linux-rt-users@vger.kernel.org
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1455871601-27484-2-git-send-email-wagi@monom.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/swait.h | 172 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)
 create mode 100644 include/linux/swait.h

(limited to 'include')

diff --git a/include/linux/swait.h b/include/linux/swait.h
new file mode 100644
index 000000000000..c1f9c62a8a50
--- /dev/null
+++ b/include/linux/swait.h
@@ -0,0 +1,172 @@
+#ifndef _LINUX_SWAIT_H
+#define _LINUX_SWAIT_H
+
+#include <linux/list.h>
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <asm/current.h>
+
+/*
+ * Simple wait queues
+ *
+ * While these are very similar to the other/complex wait queues (wait.h) the
+ * most important difference is that the simple waitqueue allows for
+ * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
+ * times.
+ *
+ * In order to make this so, we had to drop a fair number of features of the
+ * other waitqueue code; notably:
+ *
+ *  - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue;
+ *    all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
+ *    sleeper state.
+ *
+ *  - the exclusive mode; because this requires preserving the list order
+ *    and this is hard.
+ *
+ *  - custom wake functions; because you cannot give any guarantees about
+ *    random code.
+ *
+ * As a side effect of this; the data structures are slimmer.
+ *
+ * One would recommend using this wait queue where possible.
+ */
+
+struct task_struct;
+
+struct swait_queue_head {
+	raw_spinlock_t		lock;
+	struct list_head	task_list;
+};
+
+struct swait_queue {
+	struct task_struct	*task;
+	struct list_head	task_list;
+};
+
+#define __SWAITQUEUE_INITIALIZER(name) {				\
+	.task		= current,					\
+	.task_list	= LIST_HEAD_INIT((name).task_list),		\
+}
+
+#define DECLARE_SWAITQUEUE(name)					\
+	struct swait_queue name = __SWAITQUEUE_INITIALIZER(name)
+
+#define __SWAIT_QUEUE_HEAD_INITIALIZER(name) {				\
+	.lock		= __RAW_SPIN_LOCK_UNLOCKED(name.lock),		\
+	.task_list	= LIST_HEAD_INIT((name).task_list),		\
+}
+
+#define DECLARE_SWAIT_QUEUE_HEAD(name)					\
+	struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name)
+
+extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
+				    struct lock_class_key *key);
+
+#define init_swait_queue_head(q)				\
+	do {							\
+		static struct lock_class_key __key;		\
+		__init_swait_queue_head((q), #q, &__key);	\
+	} while (0)
+
+#ifdef CONFIG_LOCKDEP
+# define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)			\
+	({ init_swait_queue_head(&name); name; })
+# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)			\
+	struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)
+#else
+# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)			\
+	DECLARE_SWAIT_QUEUE_HEAD(name)
+#endif
+
+static inline int swait_active(struct swait_queue_head *q)
+{
+	return !list_empty(&q->task_list);
+}
+
+extern void swake_up(struct swait_queue_head *q);
+extern void swake_up_all(struct swait_queue_head *q);
+extern void swake_up_locked(struct swait_queue_head *q);
+
+extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
+extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
+extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
+
+extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
+extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
+
+/* as per ___wait_event() but for swait, therefore "exclusive == 0" */
+#define ___swait_event(wq, condition, state, ret, cmd)			\
+({									\
+	struct swait_queue __wait;					\
+	long __ret = ret;						\
+									\
+	INIT_LIST_HEAD(&__wait.task_list);				\
+	for (;;) {							\
+		long __int = prepare_to_swait_event(&wq, &__wait, state);\
+									\
+		if (condition)						\
+			break;						\
+									\
+		if (___wait_is_interruptible(state) && __int) {		\
+			__ret = __int;					\
+			break;						\
+		}							\
+									\
+		cmd;							\
+	}								\
+	finish_swait(&wq, &__wait);					\
+	__ret;								\
+})
+
+#define __swait_event(wq, condition)					\
+	(void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,	\
+			    schedule())
+
+#define swait_event(wq, condition)					\
+do {									\
+	if (condition)							\
+		break;							\
+	__swait_event(wq, condition);					\
+} while (0)
+
+#define __swait_event_timeout(wq, condition, timeout)			\
+	___swait_event(wq, ___wait_cond_timeout(condition),		\
+		      TASK_UNINTERRUPTIBLE, timeout,			\
+		      __ret = schedule_timeout(__ret))
+
+#define swait_event_timeout(wq, condition, timeout)			\
+({									\
+	long __ret = timeout;						\
+	if (!___wait_cond_timeout(condition))				\
+		__ret = __swait_event_timeout(wq, condition, timeout);	\
+	__ret;								\
+})
+
+#define __swait_event_interruptible(wq, condition)			\
+	___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0,		\
+		      schedule())
+
+#define swait_event_interruptible(wq, condition)			\
+({									\
+	int __ret = 0;							\
+	if (!(condition))						\
+		__ret = __swait_event_interruptible(wq, condition);	\
+	__ret;								\
+})
+
+#define __swait_event_interruptible_timeout(wq, condition, timeout)	\
+	___swait_event(wq, ___wait_cond_timeout(condition),		\
+		      TASK_INTERRUPTIBLE, timeout,			\
+		      __ret = schedule_timeout(__ret))
+
+#define swait_event_interruptible_timeout(wq, condition, timeout)	\
+({									\
+	long __ret = timeout;						\
+	if (!___wait_cond_timeout(condition))				\
+		__ret = __swait_event_interruptible_timeout(wq,		\
+						condition, timeout);	\
+	__ret;								\
+})
+
+#endif /* _LINUX_SWAIT_H */
-- 
cgit v1.2.3


From 8577370fb0cbe88266b7583d8d3b9f43ced077a0 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Fri, 19 Feb 2016 09:46:39 +0100
Subject: KVM: Use simple waitqueue for vcpu->wq

The problem:

On -rt, an emulated LAPIC timer instances has the following path:

1) hard interrupt
2) ksoftirqd is scheduled
3) ksoftirqd wakes up vcpu thread
4) vcpu thread is scheduled

This extra context switch introduces unnecessary latency in the
LAPIC path for a KVM guest.

The solution:

Allow waking up vcpu thread from hardirq context,
thus avoiding the need for ksoftirqd to be scheduled.

Normal waitqueues make use of spinlocks, which on -RT
are sleepable locks. Therefore, waking up a waitqueue
waiter involves locking a sleeping lock, which
is not allowed from hard interrupt context.

cyclictest command line:

This patch reduces the average latency in my tests from 14us to 11us.

Daniel writes:
Paolo asked for numbers from kvm-unit-tests/tscdeadline_latency
benchmark on mainline. The test was run 1000 times on
tip/sched/core 4.4.0-rc8-01134-g0905f04:

  ./x86-run x86/tscdeadline_latency.flat -cpu host

with idle=poll.

The test seems not to deliver really stable numbers though most of
them are smaller. Paolo write:

"Anything above ~10000 cycles means that the host went to C1 or
lower---the number means more or less nothing in that case.

The mean shows an improvement indeed."

Before:

               min             max         mean           std
count  1000.000000     1000.000000  1000.000000   1000.000000
mean   5162.596000  2019270.084000  5824.491541  20681.645558
std      75.431231   622607.723969    89.575700   6492.272062
min    4466.000000    23928.000000  5537.926500    585.864966
25%    5163.000000  1613252.750000  5790.132275  16683.745433
50%    5175.000000  2281919.000000  5834.654000  23151.990026
75%    5190.000000  2382865.750000  5861.412950  24148.206168
max    5228.000000  4175158.000000  6254.827300  46481.048691

After
               min            max         mean           std
count  1000.000000     1000.00000  1000.000000   1000.000000
mean   5143.511000  2076886.10300  5813.312474  21207.357565
std      77.668322   610413.09583    86.541500   6331.915127
min    4427.000000    25103.00000  5529.756600    559.187707
25%    5148.000000  1691272.75000  5784.889825  17473.518244
50%    5160.000000  2308328.50000  5832.025000  23464.837068
75%    5172.000000  2393037.75000  5853.177675  24223.969976
max    5222.000000  3922458.00000  6186.720500  42520.379830

[Patch was originaly based on the swait implementation found in the -rt
 tree. Daniel ported it to mainline's version and gathered the
 benchmark numbers for tscdeadline_latency test.]

Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: linux-rt-users@vger.kernel.org
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1455871601-27484-4-git-send-email-wagi@monom.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/kvm_host.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 861f690aa791..5276fe0916fc 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -25,6 +25,7 @@
 #include <linux/irqflags.h>
 #include <linux/context_tracking.h>
 #include <linux/irqbypass.h>
+#include <linux/swait.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -218,7 +219,7 @@ struct kvm_vcpu {
 	int fpu_active;
 	int guest_fpu_loaded, guest_xcr0_loaded;
 	unsigned char fpu_counter;
-	wait_queue_head_t wq;
+	struct swait_queue_head wq;
 	struct pid *pid;
 	int sigset_active;
 	sigset_t sigset;
@@ -782,7 +783,7 @@ static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
 }
 #endif
 
-static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
 {
 #ifdef __KVM_HAVE_ARCH_WQP
 	return vcpu->arch.wqp;
-- 
cgit v1.2.3


From 4ee34ea3a12396f35b26d90a094c75db95080baa Mon Sep 17 00:00:00 2001
From: Harvey Hunt <harvey.hunt@imgtec.com>
Date: Wed, 24 Feb 2016 15:16:43 +0000
Subject: libata: Align ata_device's id on a cacheline

The id buffer in ata_device is a DMA target, but it isn't explicitly
cacheline aligned. Due to this, adjacent fields can be overwritten with
stale data from memory on non coherent architectures. As a result, the
kernel is sometimes unable to communicate with an ATA device.

Fix this by ensuring that the id buffer is cacheline aligned.

This issue is similar to that fixed by Commit 84bda12af31f
("libata: align ap->sector_buf").

Signed-off-by: Harvey Hunt <harvey.hunt@imgtec.com>
Cc: linux-kernel@vger.kernel.org
Cc: <stable@vger.kernel.org> # 2.6.18
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/libata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index bec2abbd7ab2..2c4ebef79d0c 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -720,7 +720,7 @@ struct ata_device {
 	union {
 		u16		id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */
 		u32		gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */
-	};
+	} ____cacheline_aligned;
 
 	/* DEVSLP Timing Variables from Identify Device Data Log */
 	u8			devslp_timing[ATA_LOG_DEVSLP_SIZE];
-- 
cgit v1.2.3


From f4833a519aec793cf8349bf479589d37473ef6a7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 24 Feb 2016 17:38:14 +0100
Subject: ASoC: trace: fix printing jack name

After a change to the snd_jack structure, the 'name' member
is no longer available in all configurations, which results in a
build failure in the tracing code:

include/trace/events/asoc.h: In function 'trace_event_raw_event_snd_soc_jack_report':
include/trace/events/asoc.h:240:32: error: 'struct snd_jack' has no member named 'name'

The name field is normally initialized from the card shortname and
the jack "id" field:

        snprintf(jack->name, sizeof(jack->name), "%s %s",
                 card->shortname, jack->id);

This changes the tracing output to just contain the 'id' by
itself, which slightly changes the output format but avoids the
link error and is hopefully still enough to see what is going on.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: fe0d128c57bf ("ALSA: jack: Allow building the jack layer without input device")
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/trace/events/asoc.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/asoc.h b/include/trace/events/asoc.h
index 317a1ed2f4ac..9130dd5a184a 100644
--- a/include/trace/events/asoc.h
+++ b/include/trace/events/asoc.h
@@ -231,13 +231,13 @@ TRACE_EVENT(snd_soc_jack_report,
 	TP_ARGS(jack, mask, val),
 
 	TP_STRUCT__entry(
-		__string(	name,		jack->jack->name	)
+		__string(	name,		jack->jack->id		)
 		__field(	int,		mask			)
 		__field(	int,		val			)
 	),
 
 	TP_fast_assign(
-		__assign_str(name, jack->jack->name);
+		__assign_str(name, jack->jack->id);
 		__entry->mask = mask;
 		__entry->val = val;
 	),
@@ -253,12 +253,12 @@ TRACE_EVENT(snd_soc_jack_notify,
 	TP_ARGS(jack, val),
 
 	TP_STRUCT__entry(
-		__string(	name,		jack->jack->name	)
+		__string(	name,		jack->jack->id		)
 		__field(	int,		val			)
 	),
 
 	TP_fast_assign(
-		__assign_str(name, jack->jack->name);
+		__assign_str(name, jack->jack->id);
 		__entry->val = val;
 	),
 
-- 
cgit v1.2.3


From 7aca0c07207385cca76025cc85231519935722b9 Mon Sep 17 00:00:00 2001
From: Alexander Kuleshov <kuleshovmail@gmail.com>
Date: Fri, 26 Feb 2016 19:14:13 -0800
Subject: clocksource: Introduce clocksource_freq2mult()

The clocksource_khz2mult() and clocksource_hz2mult() share similar
code wihch calculates a mult from the given frequency. Both implementations
in differ only in value of a frequency. This patch introduces the
clocksource_freq2mult() helper with generic implementation of
mult calculation to prevent code duplication.

Signed-off-by: Alexander Kuleshov <kuleshovmail@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Link: http://lkml.kernel.org/r/1456542854-22104-2-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h | 45 +++++++++++++++++++--------------------------
 1 file changed, 19 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 6013021a3b39..a307bf62974f 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -118,6 +118,23 @@ struct clocksource {
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
 
+static inline u32 clocksource_freq2mult(u32 freq, u32 shift_constant, u64 from)
+{
+	/*  freq = cyc/from
+	 *  mult/2^shift  = ns/cyc
+	 *  mult = ns/cyc * 2^shift
+	 *  mult = from/freq * 2^shift
+	 *  mult = from * 2^shift / freq
+	 *  mult = (from<<shift) / freq
+	 */
+	u64 tmp = ((u64)from) << shift_constant;
+
+	tmp += freq/2; /* round for do_div */
+	do_div(tmp, freq);
+
+	return (u32)tmp;
+}
+
 /**
  * clocksource_khz2mult - calculates mult from khz and shift
  * @khz:		Clocksource frequency in KHz
@@ -128,19 +145,7 @@ struct clocksource {
  */
 static inline u32 clocksource_khz2mult(u32 khz, u32 shift_constant)
 {
-	/*  khz = cyc/(Million ns)
-	 *  mult/2^shift  = ns/cyc
-	 *  mult = ns/cyc * 2^shift
-	 *  mult = 1Million/khz * 2^shift
-	 *  mult = 1000000 * 2^shift / khz
-	 *  mult = (1000000<<shift) / khz
-	 */
-	u64 tmp = ((u64)1000000) << shift_constant;
-
-	tmp += khz/2; /* round for do_div */
-	do_div(tmp, khz);
-
-	return (u32)tmp;
+	return clocksource_freq2mult(khz, shift_constant, NSEC_PER_MSEC);
 }
 
 /**
@@ -154,19 +159,7 @@ static inline u32 clocksource_khz2mult(u32 khz, u32 shift_constant)
  */
 static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant)
 {
-	/*  hz = cyc/(Billion ns)
-	 *  mult/2^shift  = ns/cyc
-	 *  mult = ns/cyc * 2^shift
-	 *  mult = 1Billion/hz * 2^shift
-	 *  mult = 1000000000 * 2^shift / hz
-	 *  mult = (1000000000<<shift) / hz
-	 */
-	u64 tmp = ((u64)1000000000) << shift_constant;
-
-	tmp += hz/2; /* round for do_div */
-	do_div(tmp, hz);
-
-	return (u32)tmp;
+	return clocksource_freq2mult(hz, shift_constant, NSEC_PER_SEC);
 }
 
 /**
-- 
cgit v1.2.3


From 54d751d4ad357c817907fe89db3222b97ff66db3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 22 Feb 2016 22:19:14 +0000
Subject: perf: Allow storage of PMU private data in event

For PMUs which are not per CPU, but e.g. per package/socket, we want to be
able to store a reference to the underlying per package/socket facility in the
event at init time so we can avoid magic storage constructs in the PMU driver.

This allows us to get rid of the per CPU dance in the intel uncore and RAPL
drivers and avoids a lookup of the per package data in the perf hotpath.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andi Kleen <andi.kleen@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Harish Chegondi <harish.chegondi@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/20160222221011.364140369@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f5c5a3fa2c81..a9d8cab18b00 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -468,6 +468,7 @@ struct perf_event {
 	int				group_flags;
 	struct perf_event		*group_leader;
 	struct pmu			*pmu;
+	void				*pmu_private;
 
 	enum perf_event_active_state	state;
 	unsigned int			attach_state;
-- 
cgit v1.2.3


From ff77e468535987b3d21b7bd4da15608ea3ce7d0b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Jan 2016 15:27:07 +0100
Subject: sched/rt: Fix PI handling vs. sched_setscheduler()

Andrea Parri reported:

> I found that the following scenario (with CONFIG_RT_GROUP_SCHED=y) is not
> handled correctly:
>
>     T1 (prio = 20)
>        lock(rtmutex);
>
>     T2 (prio = 20)
>        blocks on rtmutex  (rt_nr_boosted = 0 on T1's rq)
>
>     T1 (prio = 20)
>        sys_set_scheduler(prio = 0)
>           [new_effective_prio == oldprio]
>           T1 prio = 20    (rt_nr_boosted = 0 on T1's rq)
>
> The last step is incorrect as T1 is now boosted (c.f., rt_se_boosted());
> in particular, if we continue with
>
>    T1 (prio = 20)
>       unlock(rtmutex)
>          wakeup(T2)
>          adjust_prio(T1)
>             [prio != rt_mutex_getprio(T1)]
>	    dequeue(T1)
>	       rt_nr_boosted = (unsigned long)(-1)
>	       ...
>             T1 prio = 0
>
> then we end up leaving rt_nr_boosted in an "inconsistent" state.
>
> The simple program attached could reproduce the previous scenario; note
> that, as a consequence of the presence of this state, the "assertion"
>
>     WARN_ON(!rt_nr_running && rt_nr_boosted)
>
> from dec_rt_group() may trigger.

So normally we dequeue/enqueue tasks in sched_setscheduler(), which
would ensure the accounting stays correct. However in the early PI path
we fail to do so.

So this was introduced at around v3.14, by:

  c365c292d059 ("sched: Consider pi boosting in setscheduler()")

which fixed another problem exactly because that dequeue/enqueue, joy.

Fix this by teaching rt about DEQUEUE_SAVE/ENQUEUE_RESTORE and have it
preserve runqueue location with that option. This requires decoupling
the on_rt_rq() state from being on the list.

In order to allow for explicit movement during the SAVE/RESTORE,
introduce {DE,EN}QUEUE_MOVE. We still must use SAVE/RESTORE in these
cases to preserve other invariants.

Respecting the SAVE/RESTORE flags also has the (nice) side-effect that
things like sys_nice()/sys_sched_setaffinity() also do not reorder
FIFO tasks (whereas they used to before this patch).

Reported-by: Andrea Parri <parri.andrea@gmail.com>
Tested-by: Andrea Parri <parri.andrea@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a292c4b7e94c..87e5f9886ac8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1293,6 +1293,8 @@ struct sched_rt_entity {
 	unsigned long timeout;
 	unsigned long watchdog_stamp;
 	unsigned int time_slice;
+	unsigned short on_rq;
+	unsigned short on_list;
 
 	struct sched_rt_entity *back;
 #ifdef CONFIG_RT_GROUP_SCHED
-- 
cgit v1.2.3


From f904f58263e1df5f70feb8b283f4bbe662847334 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 26 Feb 2016 14:54:56 +0100
Subject: sched/debug: Fix preempt_disable_ip recording for preempt_disable()

The preempt_disable() invokes preempt_count_add() which saves the caller
in ->preempt_disable_ip. It uses CALLER_ADDR1 which does not look for
its caller but for the parent of the caller. Which means we get the correct
caller for something like spin_lock() unless the architectures inlines
those invocations. It is always wrong for preempt_disable() or
local_bh_disable().

This patch makes the function get_lock_parent_ip() which tries
CALLER_ADDR0,1,2 if the former is a locking function.
This seems to record the preempt_disable() caller properly for
preempt_disable() itself as well as for get_cpu_var() or
local_bh_disable().

Steven asked for the get_parent_ip() -> get_lock_parent_ip() rename.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20160226135456.GB18244@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/ftrace.h | 12 ++++++++++++
 include/linux/sched.h  |  2 --
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index c2b340e23f62..6d9df3f7e334 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -713,6 +713,18 @@ static inline void __ftrace_enabled_restore(int enabled)
 #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
 #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
 
+static inline unsigned long get_lock_parent_ip(void)
+{
+	unsigned long addr = CALLER_ADDR0;
+
+	if (!in_lock_functions(addr))
+		return addr;
+	addr = CALLER_ADDR1;
+	if (!in_lock_functions(addr))
+		return addr;
+	return CALLER_ADDR2;
+}
+
 #ifdef CONFIG_IRQSOFF_TRACER
   extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
   extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 87e5f9886ac8..9519b66fc21f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -182,8 +182,6 @@ extern void update_cpu_load_nohz(int active);
 static inline void update_cpu_load_nohz(int active) { }
 #endif
 
-extern unsigned long get_parent_ip(unsigned long addr);
-
 extern void dump_cpu_task(int cpu);
 
 struct seq_file;
-- 
cgit v1.2.3


From b82e530290a0437522720becaf4abdf8ca4cb7d2 Mon Sep 17 00:00:00 2001
From: Dan Streetman <dan.streetman@canonical.com>
Date: Fri, 19 Feb 2016 13:49:27 -0500
Subject: locking/qspinlock: Move __ARCH_SPIN_LOCK_UNLOCKED to
 qspinlock_types.h

Move the __ARCH_SPIN_LOCK_UNLOCKED definition from qspinlock.h into
qspinlock_types.h.

The definition of __ARCH_SPIN_LOCK_UNLOCKED comes from the build arch's
include files; but on x86 when CONFIG_QUEUED_SPINLOCKS=y, it just
it's defined in asm-generic/qspinlock.h.  In most cases, this doesn't
matter because linux/spinlock.h includes asm/spinlock.h, which for x86
includes asm-generic/qspinlock.h.  However, any code that only includes
linux/mutex.h will break, because it only includes asm/spinlock_types.h.

For example, this breaks systemtap, which only includes mutex.h.

Signed-off-by: Dan Streetman <dan.streetman@canonical.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Waiman Long <Waiman.Long@hpe.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1455907767-17821-1-git-send-email-dan.streetman@canonical.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/qspinlock.h       | 5 -----
 include/asm-generic/qspinlock_types.h | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 39e1cb201b8e..35a52a880b2f 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -119,11 +119,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
 }
 #endif
 
-/*
- * Initializier
- */
-#define	__ARCH_SPIN_LOCK_UNLOCKED	{ ATOMIC_INIT(0) }
-
 /*
  * Remapping spinlock architecture specific functions to the corresponding
  * queued spinlock functions.
diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h
index 85f888e86761..034acd0c4956 100644
--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h
@@ -32,6 +32,11 @@ typedef struct qspinlock {
 	atomic_t	val;
 } arch_spinlock_t;
 
+/*
+ * Initializier
+ */
+#define	__ARCH_SPIN_LOCK_UNLOCKED	{ ATOMIC_INIT(0) }
+
 /*
  * Bitfields in the atomic value:
  *
-- 
cgit v1.2.3


From a528aca7f359f4b0b1d72ae406097e491a5ba9ea Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 29 Feb 2016 12:12:46 -0500
Subject: use ->d_seq to get coherency between ->d_inode and ->d_flags

Games with ordering and barriers are way too brittle.  Just
bump ->d_seq before and after updating ->d_inode and ->d_flags
type bits, so that verifying ->d_seq would guarantee they are
coherent.

Cc: stable@vger.kernel.org # v3.13+
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/dcache.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 7781ce110503..c4b5f4b3f8f8 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -409,9 +409,7 @@ static inline bool d_mountpoint(const struct dentry *dentry)
  */
 static inline unsigned __d_entry_type(const struct dentry *dentry)
 {
-	unsigned type = READ_ONCE(dentry->d_flags);
-	smp_rmb();
-	return type & DCACHE_ENTRY_TYPE;
+	return dentry->d_flags & DCACHE_ENTRY_TYPE;
 }
 
 static inline bool d_is_miss(const struct dentry *dentry)
-- 
cgit v1.2.3


From 090e77c391dd983c8945b8e2e16d09f378d2e334 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Feb 2016 18:43:23 +0000
Subject: cpu/hotplug: Restructure FROZEN state handling

There are only a few callbacks which really care about FROZEN
vs. !FROZEN. No need to have extra states for this.

Publish the frozen state in an extra variable which is updated under
the hotplug lock and let the users interested deal with it w/o
imposing that extra state checks on everyone.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Rik van Riel <riel@redhat.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: "Srivatsa S. Bhat" <srivatsa@mit.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Link: http://lkml.kernel.org/r/20160226182340.334912357@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d2ca8c38f9c4..f2fb54938ee6 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -118,6 +118,7 @@ enum {
 
 
 #ifdef CONFIG_SMP
+extern bool cpuhp_tasks_frozen;
 /* Need to know about CPUs going up/down? */
 #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE)
 #define cpu_notifier(fn, pri) {					\
@@ -177,6 +178,7 @@ extern void cpu_maps_update_done(void);
 #define cpu_notifier_register_done	cpu_maps_update_done
 
 #else	/* CONFIG_SMP */
+#define cpuhp_tasks_frozen	0
 
 #define cpu_notifier(fn, pri)	do { (void)(fn); } while (0)
 #define __cpu_notifier(fn, pri)	do { (void)(fn); } while (0)
-- 
cgit v1.2.3


From 5ba9ac8e2c45ab165e5b4a246f4821d319656e9d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Feb 2016 18:43:27 +0000
Subject: cpu/hotplug: Add tracepoints

We want to trace the hotplug machinery. Add tracepoints to track the
invocation of callbacks and their result.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Rik van Riel <riel@redhat.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: "Srivatsa S. Bhat" <srivatsa@mit.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Link: http://lkml.kernel.org/r/20160226182340.593563875@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/trace/events/cpuhp.h | 66 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 include/trace/events/cpuhp.h

(limited to 'include')

diff --git a/include/trace/events/cpuhp.h b/include/trace/events/cpuhp.h
new file mode 100644
index 000000000000..a72bd93ec7e5
--- /dev/null
+++ b/include/trace/events/cpuhp.h
@@ -0,0 +1,66 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cpuhp
+
+#if !defined(_TRACE_CPUHP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CPUHP_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(cpuhp_enter,
+
+	TP_PROTO(unsigned int cpu,
+		 int target,
+		 int idx,
+		 int (*fun)(unsigned int)),
+
+	TP_ARGS(cpu, target, idx, fun),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	cpu		)
+		__field( int,		target		)
+		__field( int,		idx		)
+		__field( void *,	fun		)
+	),
+
+	TP_fast_assign(
+		__entry->cpu	= cpu;
+		__entry->target	= target;
+		__entry->idx	= idx;
+		__entry->fun	= fun;
+	),
+
+	TP_printk("cpu: %04u target: %3d step: %3d (%pf)",
+		  __entry->cpu, __entry->target, __entry->idx, __entry->fun)
+);
+
+TRACE_EVENT(cpuhp_exit,
+
+	TP_PROTO(unsigned int cpu,
+		 int state,
+		 int idx,
+		 int ret),
+
+	TP_ARGS(cpu, state, idx, ret),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	cpu		)
+		__field( int,		state		)
+		__field( int,		idx		)
+		__field( int,		ret		)
+	),
+
+	TP_fast_assign(
+		__entry->cpu	= cpu;
+		__entry->state	= state;
+		__entry->idx	= idx;
+		__entry->ret	= ret;
+	),
+
+	TP_printk(" cpu: %04u  state: %3d step: %3d ret: %d",
+		  __entry->cpu, __entry->state, __entry->idx,  __entry->ret)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From cff7d378d3fdbb53db9b6e2578b14855f401cd41 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Feb 2016 18:43:28 +0000
Subject: cpu/hotplug: Convert to a state machine for the control processor

Move the split out steps into a callback array and let the cpu_up/down
code iterate through the array functions. For now most of the
callbacks are asymmetric to resemble the current hotplug maze.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Rik van Riel <riel@redhat.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: "Srivatsa S. Bhat" <srivatsa@mit.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Link: http://lkml.kernel.org/r/20160226182340.671816690@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpu.h        |  9 ++++-----
 include/linux/cpuhotplug.h | 13 +++++++++++++
 2 files changed, 17 insertions(+), 5 deletions(-)
 create mode 100644 include/linux/cpuhotplug.h

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index f2fb54938ee6..78989f20420f 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -16,6 +16,7 @@
 #include <linux/node.h>
 #include <linux/compiler.h>
 #include <linux/cpumask.h>
+#include <linux/cpuhotplug.h>
 
 struct device;
 struct device_node;
@@ -27,6 +28,9 @@ struct cpu {
 	struct device dev;
 };
 
+extern void boot_cpu_init(void);
+extern void boot_cpu_state_init(void);
+
 extern int register_cpu(struct cpu *cpu, int num);
 extern struct device *get_cpu_device(unsigned cpu);
 extern bool cpu_is_hotpluggable(unsigned cpu);
@@ -267,11 +271,6 @@ static inline int disable_nonboot_cpus(void) { return 0; }
 static inline void enable_nonboot_cpus(void) {}
 #endif /* !CONFIG_PM_SLEEP_SMP */
 
-enum cpuhp_state {
-	CPUHP_OFFLINE,
-	CPUHP_ONLINE,
-};
-
 void cpu_startup_entry(enum cpuhp_state state);
 
 void cpu_idle_poll_ctrl(bool enable);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
new file mode 100644
index 000000000000..d55c9e64acd7
--- /dev/null
+++ b/include/linux/cpuhotplug.h
@@ -0,0 +1,13 @@
+#ifndef __CPUHOTPLUG_H
+#define __CPUHOTPLUG_H
+
+enum cpuhp_state {
+	CPUHP_OFFLINE,
+	CPUHP_CREATE_THREADS,
+	CPUHP_NOTIFY_PREPARE,
+	CPUHP_BRINGUP_CPU,
+	CPUHP_NOTIFY_ONLINE,
+	CPUHP_ONLINE,
+};
+
+#endif
-- 
cgit v1.2.3


From 4baa0afc6719cbf36a1e08551484a641926b3fd1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Feb 2016 18:43:29 +0000
Subject: cpu/hotplug: Convert the hotplugged cpu work to a state machine

Move the functions which need to run on the hotplugged processor into
a state machine array and let the code iterate through these functions.

In a later state, this will grow synchronization points between the
control processor and the hotplugged processor, so we can move the
various architecture implementations of the synchronizations to the
core.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Rik van Riel <riel@redhat.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: "Srivatsa S. Bhat" <srivatsa@mit.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Link: http://lkml.kernel.org/r/20160226182340.770651526@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index d55c9e64acd7..d9303cca83d3 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -6,6 +6,10 @@ enum cpuhp_state {
 	CPUHP_CREATE_THREADS,
 	CPUHP_NOTIFY_PREPARE,
 	CPUHP_BRINGUP_CPU,
+	CPUHP_AP_OFFLINE,
+	CPUHP_AP_NOTIFY_STARTING,
+	CPUHP_AP_ONLINE,
+	CPUHP_TEARDOWN_CPU,
 	CPUHP_NOTIFY_ONLINE,
 	CPUHP_ONLINE,
 };
-- 
cgit v1.2.3


From 5b7aa87e0482be768486e0c2277aa4122487eb9d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Feb 2016 18:43:33 +0000
Subject: cpu/hotplug: Implement setup/removal interface

Implement function which allow to setup/remove hotplug state callbacks.

The default behaviour for setup is to call the startup function for this state
for (or on) all cpus which have a hotplug state >= the installed state.

The default behaviour for removal is to call the teardown function for this
state for (or on) all cpus which have a hotplug state >= the installed state.

This includes rollback to the previous state in case of failure.

A special state is CPUHP_ONLINE_DYN. Its for dynamically registering a hotplug
callback pair. This is for drivers which have no dependencies to avoid that we
need to allocate CPUHP states for each of them

For both setup and remove helper functions are provided, which prevent the
core to issue the callbacks. This simplifies the conversion of existing
hotplug notifiers.

[ Dynamic registering implemented by Sebastian Siewior ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Rik van Riel <riel@redhat.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: "Srivatsa S. Bhat" <srivatsa@mit.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Link: http://lkml.kernel.org/r/20160226182341.103464877@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index d9303cca83d3..29935261b26d 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -11,7 +11,74 @@ enum cpuhp_state {
 	CPUHP_AP_ONLINE,
 	CPUHP_TEARDOWN_CPU,
 	CPUHP_NOTIFY_ONLINE,
+	CPUHP_ONLINE_DYN,
+	CPUHP_ONLINE_DYN_END		= CPUHP_ONLINE_DYN + 30,
 	CPUHP_ONLINE,
 };
 
+int __cpuhp_setup_state(enum cpuhp_state state,	const char *name, bool invoke,
+			int (*startup)(unsigned int cpu),
+			int (*teardown)(unsigned int cpu));
+
+/**
+ * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
+ * @state:	The state for which the calls are installed
+ * @name:	Name of the callback (will be used in debug output)
+ * @startup:	startup callback function
+ * @teardown:	teardown callback function
+ *
+ * Installs the callback functions and invokes the startup callback on
+ * the present cpus which have already reached the @state.
+ */
+static inline int cpuhp_setup_state(enum cpuhp_state state,
+				    const char *name,
+				    int (*startup)(unsigned int cpu),
+				    int (*teardown)(unsigned int cpu))
+{
+	return __cpuhp_setup_state(state, name, true, startup, teardown);
+}
+
+/**
+ * cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the
+ *			       callbacks
+ * @state:	The state for which the calls are installed
+ * @name:	Name of the callback.
+ * @startup:	startup callback function
+ * @teardown:	teardown callback function
+ *
+ * Same as @cpuhp_setup_state except that no calls are executed are invoked
+ * during installation of this callback. NOP if SMP=n or HOTPLUG_CPU=n.
+ */
+static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state,
+					    const char *name,
+					    int (*startup)(unsigned int cpu),
+					    int (*teardown)(unsigned int cpu))
+{
+	return __cpuhp_setup_state(state, name, false, startup, teardown);
+}
+
+void __cpuhp_remove_state(enum cpuhp_state state, bool invoke);
+
+/**
+ * cpuhp_remove_state - Remove hotplug state callbacks and invoke the teardown
+ * @state:	The state for which the calls are removed
+ *
+ * Removes the callback functions and invokes the teardown callback on
+ * the present cpus which have already reached the @state.
+ */
+static inline void cpuhp_remove_state(enum cpuhp_state state)
+{
+	__cpuhp_remove_state(state, true);
+}
+
+/**
+ * cpuhp_remove_state_nocalls - Remove hotplug state callbacks without invoking
+ *				teardown
+ * @state:	The state for which the calls are removed
+ */
+static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state)
+{
+	__cpuhp_remove_state(state, false);
+}
+
 #endif
-- 
cgit v1.2.3


From 949338e35131c551f7bf54f48a2e3a227af6721b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Feb 2016 18:43:35 +0000
Subject: cpu/hotplug: Move scheduler cpu_online notifier to hotplug core

Move the scheduler cpu online notifier part to the hotplug core. This is
anyway the highest priority callback and we need that functionality right now
for the next changes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Rik van Riel <riel@redhat.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: "Srivatsa S. Bhat" <srivatsa@mit.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Link: http://lkml.kernel.org/r/20160226182341.200791046@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 29935261b26d..2f2e5d9711c4 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -10,6 +10,7 @@ enum cpuhp_state {
 	CPUHP_AP_NOTIFY_STARTING,
 	CPUHP_AP_ONLINE,
 	CPUHP_TEARDOWN_CPU,
+	CPUHP_CPU_SET_ACTIVE,
 	CPUHP_NOTIFY_ONLINE,
 	CPUHP_ONLINE_DYN,
 	CPUHP_ONLINE_DYN_END		= CPUHP_ONLINE_DYN + 30,
-- 
cgit v1.2.3


From 931ef163309ee955611f287dc65248b39a65fc9d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Feb 2016 18:43:36 +0000
Subject: cpu/hotplug: Unpark smpboot threads from the state machine

Handle the smpboot threads in the state machine.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Rik van Riel <riel@redhat.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: "Srivatsa S. Bhat" <srivatsa@mit.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Link: http://lkml.kernel.org/r/20160226182341.295777684@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpu.h        | 7 +------
 include/linux/cpuhotplug.h | 1 +
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 78989f20420f..83f35767016d 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -78,7 +78,7 @@ enum {
 	/* migration should happen before other stuff but after perf */
 	CPU_PRI_PERF		= 20,
 	CPU_PRI_MIGRATION	= 10,
-	CPU_PRI_SMPBOOT		= 9,
+
 	/* bring up workqueues before normal notifiers and down after */
 	CPU_PRI_WORKQUEUE_UP	= 5,
 	CPU_PRI_WORKQUEUE_DOWN	= -5,
@@ -172,7 +172,6 @@ static inline void __unregister_cpu_notifier(struct notifier_block *nb)
 }
 #endif
 
-void smpboot_thread_init(void);
 int cpu_up(unsigned int cpu);
 void notify_cpu_starting(unsigned int cpu);
 extern void cpu_maps_update_begin(void);
@@ -221,10 +220,6 @@ static inline void cpu_notifier_register_done(void)
 {
 }
 
-static inline void smpboot_thread_init(void)
-{
-}
-
 #endif /* CONFIG_SMP */
 extern struct bus_type cpu_subsys;
 
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 2f2e5d9711c4..38679106fddd 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -11,6 +11,7 @@ enum cpuhp_state {
 	CPUHP_AP_ONLINE,
 	CPUHP_TEARDOWN_CPU,
 	CPUHP_CPU_SET_ACTIVE,
+	CPUHP_SMPBOOT_THREADS,
 	CPUHP_NOTIFY_ONLINE,
 	CPUHP_ONLINE_DYN,
 	CPUHP_ONLINE_DYN_END		= CPUHP_ONLINE_DYN + 30,
-- 
cgit v1.2.3


From 1cf4f629d9d246519a1e76c021806f2a51ddba4d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Feb 2016 18:43:39 +0000
Subject: cpu/hotplug: Move online calls to hotplugged cpu

Let the hotplugged cpu invoke the setup/teardown callbacks
(CPU_ONLINE/CPU_DOWN_PREPARE) itself.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Rik van Riel <riel@redhat.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: "Srivatsa S. Bhat" <srivatsa@mit.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Link: http://lkml.kernel.org/r/20160226182341.536364371@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 38679106fddd..8a715bb1e192 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -11,10 +11,12 @@ enum cpuhp_state {
 	CPUHP_AP_ONLINE,
 	CPUHP_TEARDOWN_CPU,
 	CPUHP_CPU_SET_ACTIVE,
-	CPUHP_SMPBOOT_THREADS,
-	CPUHP_NOTIFY_ONLINE,
-	CPUHP_ONLINE_DYN,
-	CPUHP_ONLINE_DYN_END		= CPUHP_ONLINE_DYN + 30,
+	CPUHP_KICK_AP_THREAD,
+	CPUHP_BP_ONLINE,
+	CPUHP_AP_SMPBOOT_THREADS,
+	CPUHP_AP_NOTIFY_ONLINE,
+	CPUHP_AP_ONLINE_DYN,
+	CPUHP_AP_ONLINE_DYN_END		= CPUHP_AP_ONLINE_DYN + 30,
 	CPUHP_ONLINE,
 };
 
-- 
cgit v1.2.3


From fc6d73d67436e7784758a831227bd019547a3f73 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Feb 2016 18:43:40 +0000
Subject: arch/hotplug: Call into idle with a proper state

Let the non boot cpus call into idle with the corresponding hotplug state, so
the hotplug core can handle the further bringup. That's a first step to
convert the boot side of the hotplugged cpus to do all the synchronization
with the other side through the state machine. For now it'll only start the
hotplug thread and kick the full bringup of the cpu.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Rik van Riel <riel@redhat.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: "Srivatsa S. Bhat" <srivatsa@mit.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Link: http://lkml.kernel.org/r/20160226182341.614102639@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 8a715bb1e192..4aa263adc536 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -13,6 +13,7 @@ enum cpuhp_state {
 	CPUHP_CPU_SET_ACTIVE,
 	CPUHP_KICK_AP_THREAD,
 	CPUHP_BP_ONLINE,
+	CPUHP_AP_ONLINE_IDLE,
 	CPUHP_AP_SMPBOOT_THREADS,
 	CPUHP_AP_NOTIFY_ONLINE,
 	CPUHP_AP_ONLINE_DYN,
-- 
cgit v1.2.3


From 8df3e07e7f21f2ed8d001e6fabf9505946b438aa Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Feb 2016 18:43:41 +0000
Subject: cpu/hotplug: Let upcoming cpu bring itself fully up

Let the upcoming cpu kick the hotplug thread and let itself complete the
bringup. That way the controll side can just wait for the completion or later
when we made the hotplug machinery async not care at all.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Rik van Riel <riel@redhat.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: "Srivatsa S. Bhat" <srivatsa@mit.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Link: http://lkml.kernel.org/r/20160226182341.697655464@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpuhotplug.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 4aa263adc536..ad5d7fcb0130 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -10,9 +10,6 @@ enum cpuhp_state {
 	CPUHP_AP_NOTIFY_STARTING,
 	CPUHP_AP_ONLINE,
 	CPUHP_TEARDOWN_CPU,
-	CPUHP_CPU_SET_ACTIVE,
-	CPUHP_KICK_AP_THREAD,
-	CPUHP_BP_ONLINE,
 	CPUHP_AP_ONLINE_IDLE,
 	CPUHP_AP_SMPBOOT_THREADS,
 	CPUHP_AP_NOTIFY_ONLINE,
@@ -86,4 +83,10 @@ static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state)
 	__cpuhp_remove_state(state, false);
 }
 
+#ifdef CONFIG_SMP
+void cpuhp_online_idle(enum cpuhp_state state);
+#else
+static inline void cpuhp_online_idle(enum cpuhp_state state) { }
+#endif
+
 #endif
-- 
cgit v1.2.3


From e69aab13117efc1987620090e539b4ebeb33a04c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Feb 2016 18:43:43 +0000
Subject: cpu/hotplug: Make wait for dead cpu completion based

Kill the busy spinning on the control side and just wait for the hotplugged
cpu to tell that it reached the dead state.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Rik van Riel <riel@redhat.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: "Srivatsa S. Bhat" <srivatsa@mit.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Link: http://lkml.kernel.org/r/20160226182341.776157858@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpu.h        | 5 +++--
 include/linux/cpuhotplug.h | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 83f35767016d..91a48d1b4ca0 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -276,14 +276,15 @@ void arch_cpu_idle_enter(void);
 void arch_cpu_idle_exit(void);
 void arch_cpu_idle_dead(void);
 
-DECLARE_PER_CPU(bool, cpu_dead_idle);
-
 int cpu_report_state(int cpu);
 int cpu_check_up_prepare(int cpu);
 void cpu_set_state_online(int cpu);
 #ifdef CONFIG_HOTPLUG_CPU
 bool cpu_wait_death(unsigned int cpu, int seconds);
 bool cpu_report_death(void);
+void cpuhp_report_idle_dead(void);
+#else
+static inline void cpuhp_report_idle_dead(void) { }
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 
 #endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index ad5d7fcb0130..5d68e15e46b7 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -6,6 +6,7 @@ enum cpuhp_state {
 	CPUHP_CREATE_THREADS,
 	CPUHP_NOTIFY_PREPARE,
 	CPUHP_BRINGUP_CPU,
+	CPUHP_AP_IDLE_DEAD,
 	CPUHP_AP_OFFLINE,
 	CPUHP_AP_NOTIFY_STARTING,
 	CPUHP_AP_ONLINE,
-- 
cgit v1.2.3


From 27d50c7eeb0f03c3d3ca72aac4d2dd487ca1f3f0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Feb 2016 18:43:44 +0000
Subject: rcu: Make CPU_DYING_IDLE an explicit call

Make the RCU CPU_DYING_IDLE callback an explicit function call, so it gets
invoked at the proper place.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: Rik van Riel <riel@redhat.com>
Cc: Rafael Wysocki <rafael.j.wysocki@intel.com>
Cc: "Srivatsa S. Bhat" <srivatsa@mit.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Turner <pjt@google.com>
Link: http://lkml.kernel.org/r/20160226182341.870167933@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpu.h      | 4 +---
 include/linux/notifier.h | 2 ++
 include/linux/rcupdate.h | 4 +---
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 91a48d1b4ca0..f9b1fab4388a 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -101,9 +101,7 @@ enum {
 					* Called on the new cpu, just before
 					* enabling interrupts. Must not sleep,
 					* must not fail */
-#define CPU_DYING_IDLE		0x000B /* CPU (unsigned)v dying, reached
-					* idle loop. */
-#define CPU_BROKEN		0x000C /* CPU (unsigned)v did not die properly,
+#define CPU_BROKEN		0x000B /* CPU (unsigned)v did not die properly,
 					* perhaps due to preemption. */
 
 /* Used for CPU hotplug events occurring while tasks are frozen due to a suspend
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index d14a4c362465..4149868de4e6 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -47,6 +47,8 @@
  * runtime initialization.
  */
 
+struct notifier_block;
+
 typedef	int (*notifier_fn_t)(struct notifier_block *nb,
 			unsigned long action, void *data);
 
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 14e6f47ee16f..fc46fe3ea259 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -332,9 +332,7 @@ void rcu_init(void);
 void rcu_sched_qs(void);
 void rcu_bh_qs(void);
 void rcu_check_callbacks(int user);
-struct notifier_block;
-int rcu_cpu_notify(struct notifier_block *self,
-		   unsigned long action, void *hcpu);
+void rcu_report_dead(unsigned int cpu);
 
 #ifndef CONFIG_TINY_RCU
 void rcu_end_inkernel_boot(void);
-- 
cgit v1.2.3


From d027d45d8a17a4145eab2d841140e9acbb7feb59 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 7 Jun 2015 15:54:30 +0200
Subject: nohz: New tick dependency mask

The tick dependency is evaluated on every IRQ and context switch. This
consists is a batch of checks which determine whether it is safe to
stop the tick or not. These checks are often split in many details:
posix cpu timers, scheduler, sched clock, perf events.... each of which
are made of smaller details: posix cpu timer involves checking process
wide timers then thread wide timers. Perf involves checking freq events
then more per cpu details.

Checking these informations asynchronously every time we update the full
dynticks state bring avoidable overhead and a messy layout.

Let's introduce instead tick dependency masks: one for system wide
dependency (unstable sched clock, freq based perf events), one for CPU
wide dependency (sched, throttling perf events), and task/signal level
dependencies (posix cpu timers). The subsystems are responsible
for setting and clearing their dependency through a set of APIs that will
take care of concurrent dependency mask modifications and kick targets
to restart the relevant CPU tick whenever needed.

This new dependency engine stays beside the old one until all subsystems
having a tick dependency are converted to it.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/sched.h |  8 +++++
 include/linux/tick.h  | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a10494a94cc3..307e48375f21 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -719,6 +719,10 @@ struct signal_struct {
 	/* Earliest-expiration cache. */
 	struct task_cputime cputime_expires;
 
+#ifdef CONFIG_NO_HZ_FULL
+	unsigned long tick_dep_mask;
+#endif
+
 	struct list_head cpu_timers[3];
 
 	struct pid *tty_old_pgrp;
@@ -1542,6 +1546,10 @@ struct task_struct {
 		VTIME_SYS,
 	} vtime_snap_whence;
 #endif
+
+#ifdef CONFIG_NO_HZ_FULL
+	unsigned long tick_dep_mask;
+#endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	u64 start_time;		/* monotonic time in nsec */
 	u64 real_start_time;	/* boot based time in nsec */
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 97fd4e543846..d60e5df8ec28 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -97,6 +97,18 @@ static inline void tick_broadcast_exit(void)
 	tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
 }
 
+enum tick_dep_bits {
+	TICK_DEP_BIT_POSIX_TIMER	= 0,
+	TICK_DEP_BIT_PERF_EVENTS	= 1,
+	TICK_DEP_BIT_SCHED		= 2,
+	TICK_DEP_BIT_CLOCK_UNSTABLE	= 3
+};
+
+#define TICK_DEP_MASK_POSIX_TIMER	(1 << TICK_DEP_BIT_POSIX_TIMER)
+#define TICK_DEP_MASK_PERF_EVENTS	(1 << TICK_DEP_BIT_PERF_EVENTS)
+#define TICK_DEP_MASK_SCHED		(1 << TICK_DEP_BIT_SCHED)
+#define TICK_DEP_MASK_CLOCK_UNSTABLE	(1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
+
 #ifdef CONFIG_NO_HZ_COMMON
 extern int tick_nohz_enabled;
 extern int tick_nohz_tick_stopped(void);
@@ -154,6 +166,72 @@ static inline int housekeeping_any_cpu(void)
 	return cpumask_any_and(housekeeping_mask, cpu_online_mask);
 }
 
+extern void tick_nohz_dep_set(enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
+extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
+extern void tick_nohz_dep_set_task(struct task_struct *tsk,
+				   enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
+				     enum tick_dep_bits bit);
+extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
+				     enum tick_dep_bits bit);
+extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
+				       enum tick_dep_bits bit);
+
+/*
+ * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
+ * on top of static keys.
+ */
+static inline void tick_dep_set(enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_set(bit);
+}
+
+static inline void tick_dep_clear(enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_clear(bit);
+}
+
+static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_cpu(cpu))
+		tick_nohz_dep_set_cpu(cpu, bit);
+}
+
+static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_cpu(cpu))
+		tick_nohz_dep_clear_cpu(cpu, bit);
+}
+
+static inline void tick_dep_set_task(struct task_struct *tsk,
+				     enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_set_task(tsk, bit);
+}
+static inline void tick_dep_clear_task(struct task_struct *tsk,
+				       enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_clear_task(tsk, bit);
+}
+static inline void tick_dep_set_signal(struct signal_struct *signal,
+				       enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_set_signal(signal, bit);
+}
+static inline void tick_dep_clear_signal(struct signal_struct *signal,
+					 enum tick_dep_bits bit)
+{
+	if (tick_nohz_full_enabled())
+		tick_nohz_dep_clear_signal(signal, bit);
+}
+
 extern void tick_nohz_full_kick(void);
 extern void tick_nohz_full_kick_cpu(int cpu);
 extern void tick_nohz_full_kick_all(void);
@@ -166,6 +244,20 @@ static inline int housekeeping_any_cpu(void)
 static inline bool tick_nohz_full_enabled(void) { return false; }
 static inline bool tick_nohz_full_cpu(int cpu) { return false; }
 static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
+
+static inline void tick_dep_set(enum tick_dep_bits bit) { }
+static inline void tick_dep_clear(enum tick_dep_bits bit) { }
+static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
+static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
+static inline void tick_dep_set_task(struct task_struct *tsk,
+				     enum tick_dep_bits bit) { }
+static inline void tick_dep_clear_task(struct task_struct *tsk,
+				       enum tick_dep_bits bit) { }
+static inline void tick_dep_set_signal(struct signal_struct *signal,
+				       enum tick_dep_bits bit) { }
+static inline void tick_dep_clear_signal(struct signal_struct *signal,
+					 enum tick_dep_bits bit) { }
+
 static inline void tick_nohz_full_kick_cpu(int cpu) { }
 static inline void tick_nohz_full_kick(void) { }
 static inline void tick_nohz_full_kick_all(void) { }
-- 
cgit v1.2.3


From e6e6cc22e067a6f44449aa8fd0328404079c3ba5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 11 Dec 2015 03:27:25 +0100
Subject: nohz: Use enum code for tick stop failure tracing message

It makes nohz tracing more lightweight, standard and easier to parse.

Examples:

       user_loop-2904  [007] d..1   517.701126: tick_stop: success=1 dependency=NONE
       user_loop-2904  [007] dn.1   518.021181: tick_stop: success=0 dependency=SCHED
    posix_timers-6142  [007] d..1  1739.027400: tick_stop: success=0 dependency=POSIX_TIMER
       user_loop-5463  [007] dN.1  1185.931939: tick_stop: success=0 dependency=PERF_EVENTS

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/tick.h         |  1 +
 include/trace/events/timer.h | 36 +++++++++++++++++++++++++++++++-----
 2 files changed, 32 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index d60e5df8ec28..96d276a923bf 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -104,6 +104,7 @@ enum tick_dep_bits {
 	TICK_DEP_BIT_CLOCK_UNSTABLE	= 3
 };
 
+#define TICK_DEP_MASK_NONE		0
 #define TICK_DEP_MASK_POSIX_TIMER	(1 << TICK_DEP_BIT_POSIX_TIMER)
 #define TICK_DEP_MASK_PERF_EVENTS	(1 << TICK_DEP_BIT_PERF_EVENTS)
 #define TICK_DEP_MASK_SCHED		(1 << TICK_DEP_BIT_SCHED)
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 073b9ac245ba..51440131d337 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -328,23 +328,49 @@ TRACE_EVENT(itimer_expire,
 );
 
 #ifdef CONFIG_NO_HZ_COMMON
+
+#define TICK_DEP_NAMES					\
+		tick_dep_name(NONE)			\
+		tick_dep_name(POSIX_TIMER)		\
+		tick_dep_name(PERF_EVENTS)		\
+		tick_dep_name(SCHED)			\
+		tick_dep_name_end(CLOCK_UNSTABLE)
+
+#undef tick_dep_name
+#undef tick_dep_name_end
+
+#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+#define tick_dep_name_end(sdep)  TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+
+TICK_DEP_NAMES
+
+#undef tick_dep_name
+#undef tick_dep_name_end
+
+#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
+#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
+
+#define show_tick_dep_name(val)				\
+	__print_symbolic(val, TICK_DEP_NAMES)
+
 TRACE_EVENT(tick_stop,
 
-	TP_PROTO(int success, char *error_msg),
+	TP_PROTO(int success, int dependency),
 
-	TP_ARGS(success, error_msg),
+	TP_ARGS(success, dependency),
 
 	TP_STRUCT__entry(
 		__field( int ,		success	)
-		__string( msg, 		error_msg )
+		__field( int ,		dependency )
 	),
 
 	TP_fast_assign(
 		__entry->success	= success;
-		__assign_str(msg, error_msg);
+		__entry->dependency	= dependency;
 	),
 
-	TP_printk("success=%s msg=%s",  __entry->success ? "yes" : "no", __get_str(msg))
+	TP_printk("success=%d dependency=%s",  __entry->success, \
+			show_tick_dep_name(__entry->dependency))
 );
 #endif
 
-- 
cgit v1.2.3


From 555e0c1ef7ff49ee5ac3a1eb12de4a2e4722f63d Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 16 Jul 2015 17:42:29 +0200
Subject: perf: Migrate perf to use new tick dependency mask model

Instead of providing asynchronous checks for the nohz subsystem to verify
perf event tick dependency, migrate perf to the new mask.

Perf needs the tick for two situations:

1) Freq events. We could set the tick dependency when those are
installed on a CPU context. But setting a global dependency on top of
the global freq events accounting is much easier. If people want that
to be optimized, we can still refine that on the per-CPU tick dependency
level. This patch dooesn't change the current behaviour anyway.

2) Throttled events: this is a per-cpu dependency.

Reviewed-by: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/perf_event.h | 6 ------
 include/linux/tick.h       | 2 --
 2 files changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b35a61a481fa..d3ff88c13632 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1108,12 +1108,6 @@ static inline void perf_event_task_tick(void)				{ }
 static inline int perf_event_release_kernel(struct perf_event *event)	{ return 0; }
 #endif
 
-#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
-extern bool perf_event_can_stop_tick(void);
-#else
-static inline bool perf_event_can_stop_tick(void)			{ return true; }
-#endif
-
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
 extern void perf_restore_debug_store(void);
 #else
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 96d276a923bf..0e63db4bc662 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -233,7 +233,6 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal,
 		tick_nohz_dep_clear_signal(signal, bit);
 }
 
-extern void tick_nohz_full_kick(void);
 extern void tick_nohz_full_kick_cpu(int cpu);
 extern void tick_nohz_full_kick_all(void);
 extern void __tick_nohz_task_switch(void);
@@ -260,7 +259,6 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal,
 					 enum tick_dep_bits bit) { }
 
 static inline void tick_nohz_full_kick_cpu(int cpu) { }
-static inline void tick_nohz_full_kick(void) { }
 static inline void tick_nohz_full_kick_all(void) { }
 static inline void __tick_nohz_task_switch(void) { }
 #endif
-- 
cgit v1.2.3


From 76d92ac305f23cada3a9b3c48a7ccea5f71019cb Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 17 Jul 2015 22:25:49 +0200
Subject: sched: Migrate sched to use new tick dependency mask model

Instead of providing asynchronous checks for the nohz subsystem to verify
sched tick dependency, migrate sched to the new mask.

Everytime a task is enqueued or dequeued, we evaluate the state of the
tick dependency on top of the policy of the tasks in the runqueue, by
order of priority:

SCHED_DEADLINE: Need the tick in order to periodically check for runtime
SCHED_FIFO    : Don't need the tick (no round-robin)
SCHED_RR      : Need the tick if more than 1 task of the same priority
                for round robin (simplified with checking if more than
                one SCHED_RR task no matter what priority).
SCHED_NORMAL  : Need the tick if more than 1 task for round-robin.

We could optimize that further with one flag per sched policy on the tick
dependency mask and perform only the checks relevant to the policy
concerned by an enqueue/dequeue operation.

Since the checks aren't based on the current task anymore, we could get
rid of the task switch hook but it's still needed for posix cpu
timers.

Reviewed-by: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/sched.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 307e48375f21..2b10348806d8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2364,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { }
 #endif
 
 #ifdef CONFIG_NO_HZ_FULL
-extern bool sched_can_stop_tick(void);
 extern u64 scheduler_tick_max_deferment(void);
-#else
-static inline bool sched_can_stop_tick(void) { return false; }
 #endif
 
 #ifdef CONFIG_SCHED_AUTOGROUP
-- 
cgit v1.2.3


From b78783000d5cb7c5994e6742e1d1ce594bfea15b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 17 Jul 2015 22:25:49 +0200
Subject: posix-cpu-timers: Migrate to use new tick dependency mask model

Instead of providing asynchronous checks for the nohz subsystem to verify
posix cpu timers tick dependency, migrate the latter to the new mask.

In order to keep track of the running timers and expose the tick
dependency accordingly, we must probe the timers queuing and dequeuing
on threads and process lists.

Unfortunately it implies both task and signal level dependencies. We
should be able to further optimize this and merge all that on the task
level dependency, at the cost of a bit of complexity and may be overhead.

Reviewed-by: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/posix-timers.h | 3 ---
 include/linux/tick.h         | 2 --
 2 files changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 907f3fd191ac..62d44c176071 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer);
 void run_posix_cpu_timers(struct task_struct *task);
 void posix_cpu_timers_exit(struct task_struct *task);
 void posix_cpu_timers_exit_group(struct task_struct *task);
-
-bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk);
-
 void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 			   cputime_t *newval, cputime_t *oldval);
 
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 0e63db4bc662..21f73649a4dc 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -234,7 +234,6 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal,
 }
 
 extern void tick_nohz_full_kick_cpu(int cpu);
-extern void tick_nohz_full_kick_all(void);
 extern void __tick_nohz_task_switch(void);
 #else
 static inline int housekeeping_any_cpu(void)
@@ -259,7 +258,6 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal,
 					 enum tick_dep_bits bit) { }
 
 static inline void tick_nohz_full_kick_cpu(int cpu) { }
-static inline void tick_nohz_full_kick_all(void) { }
 static inline void __tick_nohz_task_switch(void) { }
 #endif
 
-- 
cgit v1.2.3


From bdfc028de1b3cd59490d5413a5c87b0fa50040c2 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Mon, 29 Feb 2016 21:17:12 +0200
Subject: net/mlx5e: Fix ethtool RX hash func configuration change

We should modify TIRs explicitly to apply the new RSS configuration.
The light ndo close/open calls do not "refresh" them.

Fixes: 2d75b2bc8a8c ('net/mlx5e: Add ethtool RSS configuration options')
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/mlx5_ifc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 51f1e540fc2b..58eef02edc7e 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -4245,7 +4245,9 @@ struct mlx5_ifc_modify_tir_bitmask_bits {
 
 	u8         reserved_at_20[0x1b];
 	u8         self_lb_en[0x1];
-	u8         reserved_at_3c[0x3];
+	u8         reserved_at_3c[0x1];
+	u8         hash[0x1];
+	u8         reserved_at_3e[0x1];
 	u8         lro[0x1];
 };
 
-- 
cgit v1.2.3


From 9da0f49c8767cc0ef6101cb21156cf4380ed50dd Mon Sep 17 00:00:00 2001
From: "Christopher S. Hall" <christopher.s.hall@intel.com>
Date: Mon, 22 Feb 2016 03:15:20 -0800
Subject: time: Add timekeeping snapshot code capturing system time and counter

In the current timekeeping code there isn't any interface to
atomically capture the current relationship between the system counter
and system time. ktime_get_snapshot() returns this triple (counter,
monotonic raw, realtime) in the system_time_snapshot struct.

Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: kevin.b.stanton@intel.com
Cc: kevin.j.clarke@intel.com
Cc: hpa@zytor.com
Cc: jeffrey.t.kirsher@intel.com
Cc: netdev@vger.kernel.org
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Christopher S. Hall <christopher.s.hall@intel.com>
[jstultz: Moved structure definitions around to clean things up,
 fixed cycles_t/cycle_t confusion.]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index ec89d846324c..7817591af46f 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -266,6 +266,24 @@ extern void timekeeping_inject_sleeptime64(struct timespec64 *delta);
 extern void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw,
 				        struct timespec64 *ts_real);
 
+/*
+ * struct system_time_snapshot - simultaneous raw/real time capture with
+ *	counter value
+ * @cycles:	Clocksource counter value to produce the system times
+ * @real:	Realtime system time
+ * @raw:	Monotonic raw system time
+ */
+struct system_time_snapshot {
+	cycle_t		cycles;
+	ktime_t		real;
+	ktime_t		raw;
+};
+
+/*
+ * Simultaneously snapshot realtime and monotonic raw clocks
+ */
+extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot);
+
 /*
  * Persistent clock related interfaces
  */
-- 
cgit v1.2.3


From ba26621e63ce6dc481d90ab9f6902e058d4ea39a Mon Sep 17 00:00:00 2001
From: "Christopher S. Hall" <christopher.s.hall@intel.com>
Date: Mon, 22 Feb 2016 03:15:21 -0800
Subject: time: Remove duplicated code in ktime_get_raw_and_real()

The code in ktime_get_snapshot() is a superset of the code in
ktime_get_raw_and_real() code. Further, ktime_get_raw_and_real() is
called only by the PPS code, pps_get_ts(). Consolidate the
pps_get_ts() code into a single function calling ktime_get_snapshot()
and eliminate ktime_get_raw_and_real(). A side effect of this is that
the raw and real results of pps_get_ts() correspond to exactly the
same clock cycle. Previously these values represented separate reads
of the system clock.

Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: kevin.b.stanton@intel.com
Cc: kevin.j.clarke@intel.com
Cc: hpa@zytor.com
Cc: jeffrey.t.kirsher@intel.com
Cc: netdev@vger.kernel.org
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Christopher S. Hall <christopher.s.hall@intel.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/pps_kernel.h | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h
index 54bf1484d41f..35ac903956c7 100644
--- a/include/linux/pps_kernel.h
+++ b/include/linux/pps_kernel.h
@@ -111,22 +111,17 @@ static inline void timespec_to_pps_ktime(struct pps_ktime *kt,
 	kt->nsec = ts.tv_nsec;
 }
 
-#ifdef CONFIG_NTP_PPS
-
 static inline void pps_get_ts(struct pps_event_time *ts)
 {
-	ktime_get_raw_and_real_ts64(&ts->ts_raw, &ts->ts_real);
-}
+	struct system_time_snapshot snap;
 
-#else /* CONFIG_NTP_PPS */
-
-static inline void pps_get_ts(struct pps_event_time *ts)
-{
-	ktime_get_real_ts64(&ts->ts_real);
+	ktime_get_snapshot(&snap);
+	ts->ts_real = ktime_to_timespec64(snap.real);
+#ifdef CONFIG_NTP_PPS
+	ts->ts_raw = ktime_to_timespec64(snap.raw);
+#endif
 }
 
-#endif /* CONFIG_NTP_PPS */
-
 /* Subtract known time delay from PPS event time(s) */
 static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec64 delta)
 {
-- 
cgit v1.2.3


From 8006c24595cab106bcb9da12d35e32e14ff492df Mon Sep 17 00:00:00 2001
From: "Christopher S. Hall" <christopher.s.hall@intel.com>
Date: Mon, 22 Feb 2016 03:15:22 -0800
Subject: time: Add driver cross timestamp interface for higher precision time
 synchronization

ACKNOWLEDGMENT: cross timestamp code was developed by Thomas Gleixner
<tglx@linutronix.de>. It has changed considerably and any mistakes are
mine.

The precision with which events on multiple networked systems can be
synchronized using, as an example, PTP (IEEE 1588, 802.1AS) is limited
by the precision of the cross timestamps between the system clock and
the device (timestamp) clock. Precision here is the degree of
simultaneity when capturing the cross timestamp.

Currently the PTP cross timestamp is captured in software using the
PTP device driver ioctl PTP_SYS_OFFSET. Reads of the device clock are
interleaved with reads of the realtime clock. At best, the precision
of this cross timestamp is on the order of several microseconds due to
software latencies. Sub-microsecond precision is required for
industrial control and some media applications. To achieve this level
of precision hardware supported cross timestamping is needed.

The function get_device_system_crosstimestamp() allows device drivers
to return a cross timestamp with system time properly scaled to
nanoseconds.  The realtime value is needed to discipline that clock
using PTP and the monotonic raw value is used for applications that
don't require a "real" time, but need an unadjusted clock time.  The
get_device_system_crosstimestamp() code calls back into the driver to
ensure that the system counter is within the current timekeeping
update interval.

Modern Intel hardware provides an Always Running Timer (ART) which is
exactly related to TSC through a known frequency ratio. The ART is
routed to devices on the system and is used to precisely and
simultaneously capture the device clock with the ART.

Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: kevin.b.stanton@intel.com
Cc: kevin.j.clarke@intel.com
Cc: hpa@zytor.com
Cc: jeffrey.t.kirsher@intel.com
Cc: netdev@vger.kernel.org
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Christopher S. Hall <christopher.s.hall@intel.com>
[jstultz: Reworked to remove extra structures and simplify calling]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'include')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 7817591af46f..4a2ca65fc778 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -279,6 +279,41 @@ struct system_time_snapshot {
 	ktime_t		raw;
 };
 
+/*
+ * struct system_device_crosststamp - system/device cross-timestamp
+ *	(syncronized capture)
+ * @device:		Device time
+ * @sys_realtime:	Realtime simultaneous with device time
+ * @sys_monoraw:	Monotonic raw simultaneous with device time
+ */
+struct system_device_crosststamp {
+	ktime_t device;
+	ktime_t sys_realtime;
+	ktime_t sys_monoraw;
+};
+
+/*
+ * struct system_counterval_t - system counter value with the pointer to the
+ *	corresponding clocksource
+ * @cycles:	System counter value
+ * @cs:		Clocksource corresponding to system counter value. Used by
+ *	timekeeping code to verify comparibility of two cycle values
+ */
+struct system_counterval_t {
+	cycle_t			cycles;
+	struct clocksource	*cs;
+};
+
+/*
+ * Get cross timestamp between system clock and device clock
+ */
+extern int get_device_system_crosststamp(
+			int (*get_time_fn)(ktime_t *device_time,
+				struct system_counterval_t *system_counterval,
+				void *ctx),
+			void *ctx,
+			struct system_device_crosststamp *xtstamp);
+
 /*
  * Simultaneously snapshot realtime and monotonic raw clocks
  */
-- 
cgit v1.2.3


From 2c756feb18d9ec258dbb3a3d11c47e28820690d7 Mon Sep 17 00:00:00 2001
From: "Christopher S. Hall" <christopher.s.hall@intel.com>
Date: Mon, 22 Feb 2016 03:15:23 -0800
Subject: time: Add history to cross timestamp interface supporting slower
 devices

Another representative use case of time sync and the correlated
clocksource (in addition to PTP noted above) is PTP synchronized
audio.

In a streaming application, as an example, samples will be sent and/or
received by multiple devices with a presentation time that is in terms
of the PTP master clock. Synchronizing the audio output on these
devices requires correlating the audio clock with the PTP master
clock. The more precise this correlation is, the better the audio
quality (i.e. out of sync audio sounds bad).

From an application standpoint, to correlate the PTP master clock with
the audio device clock, the system clock is used as a intermediate
timebase. The transforms such an application would perform are:

    System Clock <-> Audio clock
    System Clock <-> Network Device Clock [<-> PTP Master Clock]

Modern Intel platforms can perform a more accurate cross timestamp in
hardware (ART,audio device clock).  The audio driver requires
ART->system time transforms -- the same as required for the network
driver. These platforms offload audio processing (including
cross-timestamps) to a DSP which to ensure uninterrupted audio
processing, communicates and response to the host only once every
millsecond. As a result is takes up to a millisecond for the DSP to
receive a request, the request is processed by the DSP, the audio
output hardware is polled for completion, the result is copied into
shared memory, and the host is notified. All of these operation occur
on a millisecond cadence.  This transaction requires about 2 ms, but
under heavier workloads it may take up to 4 ms.

Adding a history allows these slow devices the option of providing an
ART value outside of the current interval. In this case, the callback
provided is an accessor function for the previously obtained counter
value. If get_system_device_crosststamp() receives a counter value
previous to cycle_last, it consults the history provided as an
argument in history_ref and interpolates the realtime and monotonic
raw system time using the provided counter value. If there are any
clock discontinuities, e.g. from calling settimeofday(), the monotonic
raw time is interpolated in the usual way, but the realtime clock time
is adjusted by scaling the monotonic raw adjustment.

When an accessor function is used a history argument *must* be
provided. The history is initialized using ktime_get_snapshot() and
must be called before the counter values are read.

Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: kevin.b.stanton@intel.com
Cc: kevin.j.clarke@intel.com
Cc: hpa@zytor.com
Cc: jeffrey.t.kirsher@intel.com
Cc: netdev@vger.kernel.org
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Christopher S. Hall <christopher.s.hall@intel.com>
[jstultz: Fixed up cycles_t/cycle_t type confusion]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeper_internal.h | 2 ++
 include/linux/timekeeping.h         | 5 +++++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 25247220b4b7..e88005459035 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -50,6 +50,7 @@ struct tk_read_base {
  * @offs_tai:		Offset clock monotonic -> clock tai
  * @tai_offset:		The current UTC to TAI offset in seconds
  * @clock_was_set_seq:	The sequence number of clock was set events
+ * @cs_was_changed_seq:	The sequence number of clocksource change events
  * @next_leap_ktime:	CLOCK_MONOTONIC time value of a pending leap-second
  * @raw_time:		Monotonic raw base time in timespec64 format
  * @cycle_interval:	Number of clock cycles in one NTP interval
@@ -91,6 +92,7 @@ struct timekeeper {
 	ktime_t			offs_tai;
 	s32			tai_offset;
 	unsigned int		clock_was_set_seq;
+	u8			cs_was_changed_seq;
 	ktime_t			next_leap_ktime;
 	struct timespec64	raw_time;
 
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 4a2ca65fc778..96f37bee3bc1 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -272,11 +272,15 @@ extern void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw,
  * @cycles:	Clocksource counter value to produce the system times
  * @real:	Realtime system time
  * @raw:	Monotonic raw system time
+ * @clock_was_set_seq:	The sequence number of clock was set events
+ * @cs_was_changed_seq:	The sequence number of clocksource change events
  */
 struct system_time_snapshot {
 	cycle_t		cycles;
 	ktime_t		real;
 	ktime_t		raw;
+	unsigned int	clock_was_set_seq;
+	u8		cs_was_changed_seq;
 };
 
 /*
@@ -312,6 +316,7 @@ extern int get_device_system_crosststamp(
 				struct system_counterval_t *system_counterval,
 				void *ctx),
 			void *ctx,
+			struct system_time_snapshot *history,
 			struct system_device_crosststamp *xtstamp);
 
 /*
-- 
cgit v1.2.3


From 58402b6e98f9059420e64b296db6c4cb6f6c1117 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Mon, 29 Feb 2016 09:02:47 +0100
Subject: [media] media.h: use hex values for range offsets,  move connectors
 base up.

Make the base offset hexadecimal to simplify debugging since the base
addresses are hex too.

The offsets for connectors is also changed to start after the 'reserved'
range 0x10000-0x2ffff.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/media.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index f0328524be6e..13e19a18f97f 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -66,25 +66,25 @@ struct media_device_info {
 /*
  * DVB entities
  */
-#define MEDIA_ENT_F_DTV_DEMOD		(MEDIA_ENT_F_BASE + 1)
-#define MEDIA_ENT_F_TS_DEMUX		(MEDIA_ENT_F_BASE + 2)
-#define MEDIA_ENT_F_DTV_CA		(MEDIA_ENT_F_BASE + 3)
-#define MEDIA_ENT_F_DTV_NET_DECAP	(MEDIA_ENT_F_BASE + 4)
+#define MEDIA_ENT_F_DTV_DEMOD		(MEDIA_ENT_F_BASE + 0x00001)
+#define MEDIA_ENT_F_TS_DEMUX		(MEDIA_ENT_F_BASE + 0x00002)
+#define MEDIA_ENT_F_DTV_CA		(MEDIA_ENT_F_BASE + 0x00003)
+#define MEDIA_ENT_F_DTV_NET_DECAP	(MEDIA_ENT_F_BASE + 0x00004)
 
 /*
  * I/O entities
  */
-#define MEDIA_ENT_F_IO_DTV		(MEDIA_ENT_F_BASE + 1001)
-#define MEDIA_ENT_F_IO_VBI		(MEDIA_ENT_F_BASE + 1002)
-#define MEDIA_ENT_F_IO_SWRADIO		(MEDIA_ENT_F_BASE + 1003)
+#define MEDIA_ENT_F_IO_DTV		(MEDIA_ENT_F_BASE + 0x01001)
+#define MEDIA_ENT_F_IO_VBI		(MEDIA_ENT_F_BASE + 0x01002)
+#define MEDIA_ENT_F_IO_SWRADIO		(MEDIA_ENT_F_BASE + 0x01003)
 
 /*
  * Connectors
  */
 /* It is a responsibility of the entity drivers to add connectors and links */
-#define MEDIA_ENT_F_CONN_RF		(MEDIA_ENT_F_BASE + 10001)
-#define MEDIA_ENT_F_CONN_SVIDEO		(MEDIA_ENT_F_BASE + 10002)
-#define MEDIA_ENT_F_CONN_COMPOSITE	(MEDIA_ENT_F_BASE + 10003)
+#define MEDIA_ENT_F_CONN_RF		(MEDIA_ENT_F_BASE + 0x30001)
+#define MEDIA_ENT_F_CONN_SVIDEO		(MEDIA_ENT_F_BASE + 0x30002)
+#define MEDIA_ENT_F_CONN_COMPOSITE	(MEDIA_ENT_F_BASE + 0x30003)
 
 /*
  * Don't touch on those. The ranges MEDIA_ENT_F_OLD_BASE and
-- 
cgit v1.2.3


From 82e88ff1ea948d83125a8aaa7c9809f03ccc500f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 3 Mar 2016 11:11:12 +0100
Subject: hrtimer: Revert CLOCK_MONOTONIC_RAW support

Revert commits:
a6e707ddbdf1: KVM: arm/arm64: timer: Switch to CLOCK_MONOTONIC_RAW
9006a01829a5: hrtimer: Catch illegal clockids
9c808765e88e: hrtimer: Add support for CLOCK_MONOTONIC_RAW

Marc found out, that there are fundamental issues with that patch series
because __hrtimer_get_next_event() and hrtimer_forward() need support for
CLOCK_MONOTONIC_RAW. Nothing which is easily fixed, so revert the whole lot.

Reported-by: Marc Zyngier <marc.zyngier@arm.com>
Link: http://lkml.kernel.org/r/56D6CEF0.8060607@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index a6d64af5e73f..76dd4f0da5ca 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -151,7 +151,6 @@ enum  hrtimer_base_type {
 	HRTIMER_BASE_REALTIME,
 	HRTIMER_BASE_BOOTTIME,
 	HRTIMER_BASE_TAI,
-	HRTIMER_BASE_MONOTONIC_RAW,
 	HRTIMER_MAX_CLOCK_BASES,
 };
 
-- 
cgit v1.2.3


From 93125094c07d8c9ec25dff5869f191b33eb9dd6e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Thu, 3 Mar 2016 14:52:51 -0300
Subject: [media] media.h: postpone connectors entities

The representation of external connections got some heated
discussions recently. As we're too close to the merge window,
let's not set those entities into a stone.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/media.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 13e19a18f97f..323f1af35062 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -82,10 +82,18 @@ struct media_device_info {
  * Connectors
  */
 /* It is a responsibility of the entity drivers to add connectors and links */
+#ifdef __KERNEL__
+	/*
+	 * For now, it should not be used in userspace, as some
+	 * definitions may change
+	 */
+
 #define MEDIA_ENT_F_CONN_RF		(MEDIA_ENT_F_BASE + 0x30001)
 #define MEDIA_ENT_F_CONN_SVIDEO		(MEDIA_ENT_F_BASE + 0x30002)
 #define MEDIA_ENT_F_CONN_COMPOSITE	(MEDIA_ENT_F_BASE + 0x30003)
 
+#endif
+
 /*
  * Don't touch on those. The ranges MEDIA_ENT_F_OLD_BASE and
  * MEDIA_ENT_F_OLD_SUBDEV_BASE are kept to keep backward compatibility
-- 
cgit v1.2.3


From 88f8b1bb41c6208f81b6a480244533ded7b59493 Mon Sep 17 00:00:00 2001
From: Gabriel Fernandez <gabriel.fernandez@linaro.org>
Date: Mon, 29 Feb 2016 17:18:22 +0100
Subject: stmmac: Fix 'eth0: No PHY found' regression

This patch manages the case when you have an Ethernet MAC with
a "fixed link", and not connected to a normal MDIO-managed PHY device.

The test of phy_bus_name was not helpful because it was never affected
and replaced by the mdio test node.

Signed-off-by: Gabriel Fernandez <gabriel.fernandez@linaro.org>
Acked-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index eead8ab93c0a..881a79d52467 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -100,6 +100,7 @@ struct plat_stmmacenet_data {
 	int interface;
 	struct stmmac_mdio_bus_data *mdio_bus_data;
 	struct device_node *phy_node;
+	struct device_node *mdio_node;
 	struct stmmac_dma_cfg *dma_cfg;
 	int clk_csr;
 	int has_gmac;
-- 
cgit v1.2.3


From 1837b2e2bcd23137766555a63867e649c0b637f0 Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.com>
Date: Mon, 29 Feb 2016 15:03:33 -0800
Subject: mld, igmp: Fix reserved tailroom calculation

The current reserved_tailroom calculation fails to take hlen and tlen into
account.

skb:
[__hlen__|__data____________|__tlen___|__extra__]
^                                               ^
head                                            skb_end_offset

In this representation, hlen + data + tlen is the size passed to alloc_skb.
"extra" is the extra space made available in __alloc_skb because of
rounding up by kmalloc. We can reorder the representation like so:

[__hlen__|__data____________|__extra__|__tlen___]
^                                               ^
head                                            skb_end_offset

The maximum space available for ip headers and payload without
fragmentation is min(mtu, data + extra). Therefore,
reserved_tailroom
= data + extra + tlen - min(mtu, data + extra)
= skb_end_offset - hlen - min(mtu, skb_end_offset - hlen - tlen)
= skb_tailroom - min(mtu, skb_tailroom - tlen) ; after skb_reserve(hlen)

Compare the second line to the current expression:
reserved_tailroom = skb_end_offset - min(mtu, skb_end_offset)
and we can see that hlen and tlen are not taken into account.

The min() in the third line can be expanded into:
if mtu < skb_tailroom - tlen:
	reserved_tailroom = skb_tailroom - mtu
else:
	reserved_tailroom = tlen

Depending on hlen, tlen, mtu and the number of multicast address records,
the current code may output skbs that have less tailroom than
dev->needed_tailroom or it may output more skbs than needed because not all
space available is used.

Fixes: 4c672e4b ("ipv6: mld: fix add_grhead skb_over_panic for devs with large MTUs")
Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4ce9ff7086f4..d3fcd4591ce4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1985,6 +1985,30 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+/**
+ *	skb_tailroom_reserve - adjust reserved_tailroom
+ *	@skb: buffer to alter
+ *	@mtu: maximum amount of headlen permitted
+ *	@needed_tailroom: minimum amount of reserved_tailroom
+ *
+ *	Set reserved_tailroom so that headlen can be as large as possible but
+ *	not larger than mtu and tailroom cannot be smaller than
+ *	needed_tailroom.
+ *	The required headroom should already have been reserved before using
+ *	this function.
+ */
+static inline void skb_tailroom_reserve(struct sk_buff *skb, unsigned int mtu,
+					unsigned int needed_tailroom)
+{
+	SKB_LINEAR_ASSERT(skb);
+	if (mtu < skb_tailroom(skb) - needed_tailroom)
+		/* use at most mtu */
+		skb->reserved_tailroom = skb_tailroom(skb) - mtu;
+	else
+		/* use up to all available space */
+		skb->reserved_tailroom = needed_tailroom;
+}
+
 #define ENCAP_TYPE_ETHER	0
 #define ENCAP_TYPE_IPPROTO	1
 
-- 
cgit v1.2.3


From fbe093ac9f0201939279cdfe8b0fce20ce5ef7a9 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@iki.fi>
Date: Thu, 3 Mar 2016 14:20:14 -0300
Subject: [media] media: Sanitise the reserved fields of the G_TOPOLOGY IOCTL
 arguments

The argument structs are used in arrays for G_TOPOLOGY IOCTL. The
arguments themselves do not need to be aligned to a power of two, but
aligning them up to the largest basic type alignment (u64) on common ABIs
is a good thing to do.

The patch changes the size of the reserved fields to 5 or 6 u32's and
aligns the size of the struct to 8 bytes so we do no longer depend on the
compiler to perform the alignment.

While at it, add __attribute__ ((packed)) to these structs as well.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/media.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 323f1af35062..625b38f65764 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -297,14 +297,14 @@ struct media_v2_entity {
 	__u32 id;
 	char name[64];		/* FIXME: move to a property? (RFC says so) */
 	__u32 function;		/* Main function of the entity */
-	__u16 reserved[12];
-};
+	__u32 reserved[6];
+} __attribute__ ((packed));
 
 /* Should match the specific fields at media_intf_devnode */
 struct media_v2_intf_devnode {
 	__u32 major;
 	__u32 minor;
-};
+} __attribute__ ((packed));
 
 struct media_v2_interface {
 	__u32 id;
@@ -316,22 +316,22 @@ struct media_v2_interface {
 		struct media_v2_intf_devnode devnode;
 		__u32 raw[16];
 	};
-};
+} __attribute__ ((packed));
 
 struct media_v2_pad {
 	__u32 id;
 	__u32 entity_id;
 	__u32 flags;
-	__u16 reserved[9];
-};
+	__u32 reserved[5];
+} __attribute__ ((packed));
 
 struct media_v2_link {
 	__u32 id;
 	__u32 source_id;
 	__u32 sink_id;
 	__u32 flags;
-	__u32 reserved[5];
-};
+	__u32 reserved[6];
+} __attribute__ ((packed));
 
 struct media_v2_topology {
 	__u64 topology_version;
@@ -351,7 +351,7 @@ struct media_v2_topology {
 	__u32 num_links;
 	__u32 reserved4;
 	__u64 ptr_links;
-};
+} __attribute__ ((packed));
 
 static inline void __user *media_get_uptr(__u64 arg)
 {
-- 
cgit v1.2.3


From 7bcd79ac50d9d83350a835bdb91c04ac9e098412 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Fri, 26 Feb 2016 23:40:50 +0800
Subject: block: bio: introduce helpers to get the 1st and last bvec

The bio passed to bio_will_gap() may be fast cloned from upper
layer(dm, md, bcache, fs, ...), or from bio splitting in block
core.

Unfortunately bio_will_gap() just figures out the last bvec via
'bi_io_vec[prev->bi_vcnt - 1]' directly, and this way is obviously
wrong.

This patch introduces two helpers for getting the first and last
bvec of one bio for fixing the issue.

Cc: stable@vger.kernel.org
Reported-by: Sagi Grimberg <sagig@dev.mellanox.co.il>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5349e6816cbb..cb6888824108 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -310,6 +310,43 @@ static inline void bio_clear_flag(struct bio *bio, unsigned int bit)
 	bio->bi_flags &= ~(1U << bit);
 }
 
+static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
+{
+	*bv = bio_iovec(bio);
+}
+
+static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
+{
+	struct bvec_iter iter = bio->bi_iter;
+	int idx;
+
+	if (!bio_flagged(bio, BIO_CLONED)) {
+		*bv = bio->bi_io_vec[bio->bi_vcnt - 1];
+		return;
+	}
+
+	if (unlikely(!bio_multiple_segments(bio))) {
+		*bv = bio_iovec(bio);
+		return;
+	}
+
+	bio_advance_iter(bio, &iter, iter.bi_size);
+
+	if (!iter.bi_bvec_done)
+		idx = iter.bi_idx - 1;
+	else	/* in the middle of bvec */
+		idx = iter.bi_idx;
+
+	*bv = bio->bi_io_vec[idx];
+
+	/*
+	 * iter.bi_bvec_done records actual length of the last bvec
+	 * if this bio ends in the middle of one io vector
+	 */
+	if (iter.bi_bvec_done)
+		bv->bv_len = iter.bi_bvec_done;
+}
+
 enum bip_flags {
 	BIP_BLOCK_INTEGRITY	= 1 << 0, /* block layer owns integrity data */
 	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */
-- 
cgit v1.2.3


From e0af29171aa8912e1ca95023b75ef336cd70d661 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Fri, 26 Feb 2016 23:40:51 +0800
Subject: block: check virt boundary in bio_will_gap()

In the following patch, the way for figuring out
the last bvec will be changed with a bit cost introduced,
so return immediately if the queue doesn't have virt
boundary limit. Actually most of devices have not
this limit.

Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4571ef1a12a9..cd06a41581b3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1372,6 +1372,13 @@ static inline void put_dev_sector(Sector p)
 	page_cache_release(p.v);
 }
 
+static inline bool __bvec_gap_to_prev(struct request_queue *q,
+				struct bio_vec *bprv, unsigned int offset)
+{
+	return offset ||
+		((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
+}
+
 /*
  * Check if adding a bio_vec after bprv with offset would create a gap in
  * the SG list. Most drivers don't care about this, but some do.
@@ -1381,18 +1388,17 @@ static inline bool bvec_gap_to_prev(struct request_queue *q,
 {
 	if (!queue_virt_boundary(q))
 		return false;
-	return offset ||
-		((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
+	return __bvec_gap_to_prev(q, bprv, offset);
 }
 
 static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
 			 struct bio *next)
 {
-	if (!bio_has_data(prev))
+	if (!bio_has_data(prev) || !queue_virt_boundary(q))
 		return false;
 
-	return bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1],
-				next->bi_io_vec[0].bv_offset);
+	return __bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1],
+				  next->bi_io_vec[0].bv_offset);
 }
 
 static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
-- 
cgit v1.2.3


From 25e71a99f10e444cd00bb2ebccb11e1c9fb672b1 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Fri, 26 Feb 2016 23:40:52 +0800
Subject: block: get the 1st and last bvec via helpers

This patch applies the two introduced helpers to
figure out the 1st and last bvec, and fixes the
original way after bio splitting.

Cc: stable@vger.kernel.org
Reported-by: Sagi Grimberg <sagig@dev.mellanox.co.il>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cd06a41581b3..d7f6bca707ef 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1394,11 +1394,16 @@ static inline bool bvec_gap_to_prev(struct request_queue *q,
 static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
 			 struct bio *next)
 {
-	if (!bio_has_data(prev) || !queue_virt_boundary(q))
-		return false;
+	if (bio_has_data(prev) && queue_virt_boundary(q)) {
+		struct bio_vec pb, nb;
+
+		bio_get_last_bvec(prev, &pb);
+		bio_get_first_bvec(next, &nb);
 
-	return __bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1],
-				  next->bi_io_vec[0].bv_offset);
+		return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
+	}
+
+	return false;
 }
 
 static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
-- 
cgit v1.2.3


From a1a0e23e49037c23ea84bc8cc146a03584d13577 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 29 Feb 2016 18:28:53 -0500
Subject: writeback: flush inode cgroup wb switches instead of pinning
 super_block

If cgroup writeback is in use, inodes can be scheduled for
asynchronous wb switching.  Before 5ff8eaac1636 ("writeback: keep
superblock pinned during cgroup writeback association switches"), this
could race with umount leading to super_block being destroyed while
inodes are pinned for wb switching.  5ff8eaac1636 fixed it by bumping
s_active while wb switches are in flight; however, this allowed
in-flight wb switches to make umounts asynchronous when the userland
expected synchronosity - e.g. fsck immediately following umount may
fail because the device is still busy.

This patch removes the problematic super_block pinning and instead
makes generic_shutdown_super() flush in-flight wb switches.  wb
switches are now executed on a dedicated isw_wq so that they can be
flushed and isw_nr_in_flight keeps track of the number of in-flight wb
switches so that flushing can be avoided in most cases.

v2: Move cgroup_writeback_umount() further below and add MS_ACTIVE
    check in inode_switch_wbs() as Jan an Al suggested.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Tahsin Erdogan <tahsin@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Link: http://lkml.kernel.org/g/CAAeU0aNCq7LGODvVGRU-oU_o-6enii5ey0p1c26D1ZzYwkDc5A@mail.gmail.com
Fixes: 5ff8eaac1636 ("writeback: keep superblock pinned during cgroup writeback association switches")
Cc: stable@vger.kernel.org #v4.5
Reviewed-by: Jan Kara <jack@suse.cz>
Tested-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/writeback.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index b333c945e571..d0b5ca5d4e08 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -198,6 +198,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
 void wbc_detach_inode(struct writeback_control *wbc);
 void wbc_account_io(struct writeback_control *wbc, struct page *page,
 		    size_t bytes);
+void cgroup_writeback_umount(void);
 
 /**
  * inode_attach_wb - associate an inode with its wb
@@ -301,6 +302,10 @@ static inline void wbc_account_io(struct writeback_control *wbc,
 {
 }
 
+static inline void cgroup_writeback_umount(void)
+{
+}
+
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 /*
-- 
cgit v1.2.3


From f21018427cb007a0894c36ad702990ab639cbbb4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Mar 2016 14:43:45 -0700
Subject: block: fix blk_rq_get_max_sectors for driver private requests

Driver private request types should not get the artifical cap for the
FS requests.  This is important to use the full device capabilities
for internal command or NVMe pass through commands.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Jeff Lien <Jeff.Lien@hgst.com>
Tested-by: Jeff Lien <Jeff.Lien@hgst.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>

Updated by me to use an explicit check for the one command type that
does support extended checking, instead of relying on the ordering
of the enum command values - as suggested by Keith.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d7f6bca707ef..413c84fbc4ed 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -895,7 +895,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 
-	if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
+	if (unlikely(rq->cmd_type != REQ_TYPE_FS))
 		return q->limits.max_hw_sectors;
 
 	if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD))
-- 
cgit v1.2.3


From 719f1aa4a67199a3c4c68a03f94e5ec44d9d5f82 Mon Sep 17 00:00:00 2001
From: "Christopher S. Hall" <christopher.s.hall@intel.com>
Date: Mon, 22 Feb 2016 03:15:25 -0800
Subject: ptp: Add PTP_SYS_OFFSET_PRECISE for driver crosstimestamping

Currently, network /system cross-timestamping is performed in the
PTP_SYS_OFFSET ioctl. The PTP clock driver reads gettimeofday() and
the gettime64() callback provided by the driver. The cross-timestamp
is best effort where the latency between the capture of system time
(getnstimeofday()) and the device time (driver callback) may be
significant.

The getcrosststamp() callback and corresponding PTP_SYS_OFFSET_PRECISE
ioctl allows the driver to perform this device/system correlation when
for example cross timestamp hardware is available. Modern Intel
systems can do this for onboard Ethernet controllers using the ART
counter. There is virtually zero latency between captures of the ART
and network device clock.

The capabilities ioctl (PTP_CLOCK_GETCAPS), is augmented allowing
applications to query whether or not drivers implement the
getcrosststamp callback, providing more precise cross timestamping.

Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: kevin.b.stanton@intel.com
Cc: kevin.j.clarke@intel.com
Cc: hpa@zytor.com
Cc: jeffrey.t.kirsher@intel.com
Cc: netdev@vger.kernel.org
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Christopher S. Hall <christopher.s.hall@intel.com>
[jstultz: Commit subject tweaks]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/ptp_clock_kernel.h |  8 ++++++++
 include/uapi/linux/ptp_clock.h   | 13 ++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index b8b73066d137..6b15e168148a 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -38,6 +38,7 @@ struct ptp_clock_request {
 	};
 };
 
+struct system_device_crosststamp;
 /**
  * struct ptp_clock_info - decribes a PTP hardware clock
  *
@@ -67,6 +68,11 @@ struct ptp_clock_request {
  * @gettime64:  Reads the current time from the hardware clock.
  *              parameter ts: Holds the result.
  *
+ * @getcrosststamp:  Reads the current time from the hardware clock and
+ *                   system clock simultaneously.
+ *                   parameter cts: Contains timestamp (device,system) pair,
+ *                   where system time is realtime and monotonic.
+ *
  * @settime64:  Set the current time on the hardware clock.
  *              parameter ts: Time value to set.
  *
@@ -105,6 +111,8 @@ struct ptp_clock_info {
 	int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta);
 	int (*adjtime)(struct ptp_clock_info *ptp, s64 delta);
 	int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts);
+	int (*getcrosststamp)(struct ptp_clock_info *ptp,
+			      struct system_device_crosststamp *cts);
 	int (*settime64)(struct ptp_clock_info *p, const struct timespec64 *ts);
 	int (*enable)(struct ptp_clock_info *ptp,
 		      struct ptp_clock_request *request, int on);
diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
index f0b7bfe5da92..ac6dded80ffa 100644
--- a/include/uapi/linux/ptp_clock.h
+++ b/include/uapi/linux/ptp_clock.h
@@ -51,7 +51,9 @@ struct ptp_clock_caps {
 	int n_per_out; /* Number of programmable periodic signals. */
 	int pps;       /* Whether the clock supports a PPS callback. */
 	int n_pins;    /* Number of input/output pins. */
-	int rsv[14];   /* Reserved for future use. */
+	/* Whether the clock supports precise system-device cross timestamps */
+	int cross_timestamping;
+	int rsv[13];   /* Reserved for future use. */
 };
 
 struct ptp_extts_request {
@@ -81,6 +83,13 @@ struct ptp_sys_offset {
 	struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1];
 };
 
+struct ptp_sys_offset_precise {
+	struct ptp_clock_time device;
+	struct ptp_clock_time sys_realtime;
+	struct ptp_clock_time sys_monoraw;
+	unsigned int rsv[4];    /* Reserved for future use. */
+};
+
 enum ptp_pin_function {
 	PTP_PF_NONE,
 	PTP_PF_EXTTS,
@@ -124,6 +133,8 @@ struct ptp_pin_desc {
 #define PTP_SYS_OFFSET     _IOW(PTP_CLK_MAGIC, 5, struct ptp_sys_offset)
 #define PTP_PIN_GETFUNC    _IOWR(PTP_CLK_MAGIC, 6, struct ptp_pin_desc)
 #define PTP_PIN_SETFUNC    _IOW(PTP_CLK_MAGIC, 7, struct ptp_pin_desc)
+#define PTP_SYS_OFFSET_PRECISE \
+	_IOWR(PTP_CLK_MAGIC, 8, struct ptp_sys_offset_precise)
 
 struct ptp_extts_event {
 	struct ptp_clock_time t; /* Time event occured. */
-- 
cgit v1.2.3


From e57cbaf0eb006eaa207395f3bfd7ce52c1b5539c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 3 Mar 2016 17:18:20 -0500
Subject: tracing: Do not have 'comm' filter override event 'comm' field

Commit 9f61668073a8d "tracing: Allow triggers to filter for CPU ids and
process names" added a 'comm' filter that will filter events based on the
current tasks struct 'comm'. But this now hides the ability to filter events
that have a 'comm' field too. For example, sched_migrate_task trace event.
That has a 'comm' field of the task to be migrated.

 echo 'comm == "bash"' > events/sched_migrate_task/filter

will now filter all sched_migrate_task events for tasks named "bash" that
migrates other tasks (in interrupt context), instead of seeing when "bash"
itself gets migrated.

This fix requires a couple of changes.

1) Change the look up order for filter predicates to look at the events
   fields before looking at the generic filters.

2) Instead of basing the filter function off of the "comm" name, have the
   generic "comm" filter have its own filter_type (FILTER_COMM). Test
   against the type instead of the name to assign the filter function.

3) Add a new "COMM" filter that works just like "comm" but will filter based
   on the current task, even if the trace event contains a "comm" field.

Do the same for "cpu" field, adding a FILTER_CPU and a filter "CPU".

Cc: stable@vger.kernel.org # v4.3+
Fixes: 9f61668073a8d "tracing: Allow triggers to filter for CPU ids and process names"
Reported-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 429fdfc3baf5..925730bc9fc1 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -568,6 +568,8 @@ enum {
 	FILTER_DYN_STRING,
 	FILTER_PTR_STRING,
 	FILTER_TRACE_FN,
+	FILTER_COMM,
+	FILTER_CPU,
 };
 
 extern int trace_event_raw_init(struct trace_event_call *call);
-- 
cgit v1.2.3


From 5ea5c5e0a7f70b256417d3b6e36bd9851504babd Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Sun, 14 Feb 2016 18:06:41 +0800
Subject: ceph: initial CEPH_FEATURE_FS_FILE_LAYOUT_V2 support

Add support for the format change of MClientReply/MclientCaps.
Also add code that denies access to inodes with pool_ns layouts.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/ceph_features.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index c1ef6f14e7be..15151f3c4120 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -75,6 +75,7 @@
 #define CEPH_FEATURE_CRUSH_TUNABLES5	(1ULL<<58) /* chooseleaf stable mode */
 // duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
 #define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING   (1ULL<<58) /* New, v7 encoding */
+#define CEPH_FEATURE_FS_FILE_LAYOUT_V2       (1ULL<<58) /* file_layout_t */
 
 /*
  * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
-- 
cgit v1.2.3


From 72f9f3fdc928dc3ecd223e801b32d930b662b6ed Mon Sep 17 00:00:00 2001
From: Luca Abeni <luca.abeni@unitn.it>
Date: Mon, 7 Mar 2016 12:27:04 +0100
Subject: sched/deadline: Remove dl_new from struct sched_dl_entity

The dl_new field of struct sched_dl_entity is currently used to
identify new deadline tasks, so that their deadline and runtime
can be properly initialised.

However, these tasks can be easily identified by checking if
their deadline is smaller than the current time when they switch
to SCHED_DEADLINE. So, dl_new can be removed by introducing this
check in switched_to_dl(); this allows to simplify the
SCHED_DEADLINE code.

Signed-off-by: Luca Abeni <luca.abeni@unitn.it>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1457350024-7825-2-git-send-email-luca.abeni@unitn.it
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9519b66fc21f..838a89a78332 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1333,10 +1333,6 @@ struct sched_dl_entity {
 	 * task has to wait for a replenishment to be performed at the
 	 * next firing of dl_timer.
 	 *
-	 * @dl_new tells if a new instance arrived. If so we must
-	 * start executing it with full runtime and reset its absolute
-	 * deadline;
-	 *
 	 * @dl_boosted tells if we are boosted due to DI. If so we are
 	 * outside bandwidth enforcement mechanism (but only until we
 	 * exit the critical section);
@@ -1344,7 +1340,7 @@ struct sched_dl_entity {
 	 * @dl_yielded tells if task gave up the cpu before consuming
 	 * all its available runtime during the last job.
 	 */
-	int dl_throttled, dl_new, dl_boosted, dl_yielded;
+	int dl_throttled, dl_boosted, dl_yielded;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
-- 
cgit v1.2.3


From f6e45661f9be546811b62b2b01f32f4bf0c436c0 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Fri, 22 Jan 2016 18:34:22 -0800
Subject: dma, mm/pat: Rename dma_*_writecombine() to dma_*_wc()

Rename dma_*_writecombine() to dma_*_wc(), so that the naming
is coherent across the various write-combining APIs. Keep the
old names for compatibility for a while, these can be removed
at a later time. A guard is left to enable backporting of the
rename, and later remove of the old mapping defines seemlessly.

Build tested successfully with allmodconfig.

The following Coccinelle SmPL patch was used for this simple
transformation:

@ rename_dma_alloc_writecombine @
expression dev, size, dma_addr, gfp;
@@

-dma_alloc_writecombine(dev, size, dma_addr, gfp)
+dma_alloc_wc(dev, size, dma_addr, gfp)

@ rename_dma_free_writecombine @
expression dev, size, cpu_addr, dma_addr;
@@

-dma_free_writecombine(dev, size, cpu_addr, dma_addr)
+dma_free_wc(dev, size, cpu_addr, dma_addr)

@ rename_dma_mmap_writecombine @
expression dev, vma, cpu_addr, dma_addr, size;
@@

-dma_mmap_writecombine(dev, vma, cpu_addr, dma_addr, size)
+dma_mmap_wc(dev, vma, cpu_addr, dma_addr, size)

We also keep the old names as compatibility helpers, and
guard against their definition to make backporting easier.

Generated-by: Coccinelle SmPL
Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: airlied@linux.ie
Cc: akpm@linux-foundation.org
Cc: benh@kernel.crashing.org
Cc: bhelgaas@google.com
Cc: bp@suse.de
Cc: dan.j.williams@intel.com
Cc: daniel.vetter@ffwll.ch
Cc: dhowells@redhat.com
Cc: julia.lawall@lip6.fr
Cc: konrad.wilk@oracle.com
Cc: linux-fbdev@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Cc: luto@amacapital.net
Cc: mst@redhat.com
Cc: tomi.valkeinen@ti.com
Cc: toshi.kani@hp.com
Cc: vinod.koul@intel.com
Cc: xen-devel@lists.xensource.com
Link: http://lkml.kernel.org/r/1453516462-4844-1-git-send-email-mcgrof@do-not-panic.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/dma-mapping.h | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 75857cda38e9..471e064af29f 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -641,31 +641,40 @@ static inline void dmam_release_declared_memory(struct device *dev)
 }
 #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */
 
-static inline void *dma_alloc_writecombine(struct device *dev, size_t size,
-					   dma_addr_t *dma_addr, gfp_t gfp)
+static inline void *dma_alloc_wc(struct device *dev, size_t size,
+				 dma_addr_t *dma_addr, gfp_t gfp)
 {
 	DEFINE_DMA_ATTRS(attrs);
 	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
 	return dma_alloc_attrs(dev, size, dma_addr, gfp, &attrs);
 }
+#ifndef dma_alloc_writecombine
+#define dma_alloc_writecombine dma_alloc_wc
+#endif
 
-static inline void dma_free_writecombine(struct device *dev, size_t size,
-					 void *cpu_addr, dma_addr_t dma_addr)
+static inline void dma_free_wc(struct device *dev, size_t size,
+			       void *cpu_addr, dma_addr_t dma_addr)
 {
 	DEFINE_DMA_ATTRS(attrs);
 	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
 	return dma_free_attrs(dev, size, cpu_addr, dma_addr, &attrs);
 }
+#ifndef dma_free_writecombine
+#define dma_free_writecombine dma_free_wc
+#endif
 
-static inline int dma_mmap_writecombine(struct device *dev,
-					struct vm_area_struct *vma,
-					void *cpu_addr, dma_addr_t dma_addr,
-					size_t size)
+static inline int dma_mmap_wc(struct device *dev,
+			      struct vm_area_struct *vma,
+			      void *cpu_addr, dma_addr_t dma_addr,
+			      size_t size)
 {
 	DEFINE_DMA_ATTRS(attrs);
 	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
 	return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs);
 }
+#ifndef dma_mmap_writecombine
+#define dma_mmap_writecombine dma_mmap_wc
+#endif
 
 #ifdef CONFIG_NEED_DMA_MAP_STATE
 #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME)        dma_addr_t ADDR_NAME
-- 
cgit v1.2.3


From dc17147de328a74bbdee67c1bf37d2f1992de756 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 9 Mar 2016 11:58:41 -0500
Subject: tracing: Fix check for cpu online when event is disabled

Commit f37755490fe9b ("tracepoints: Do not trace when cpu is offline") added
a check to make sure that tracepoints only get called when the cpu is
online, as it uses rcu_read_lock_sched() for protection.

Commit 3a630178fd5f3 ("tracing: generate RCU warnings even when tracepoints
are disabled") added lockdep checks (including rcu checks) for events that
are not enabled to catch possible RCU issues that would only be triggered if
a trace event was enabled. Commit f37755490fe9b only stopped the warnings
when the trace event was enabled but did not prevent warnings if the trace
event was called when disabled.

To fix this, the cpu online check is moved to where the condition is added
to the trace event. This will place the cpu online check in all places that
it may be used now and in the future.

Cc: stable@vger.kernel.org # v3.18+
Fixes: f37755490fe9b ("tracepoints: Do not trace when cpu is offline")
Fixes: 3a630178fd5f3 ("tracing: generate RCU warnings even when tracepoints are disabled")
Reported-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index acfdbf353a0b..be586c632a0c 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -134,9 +134,6 @@ extern void syscall_unregfunc(void);
 		void *it_func;						\
 		void *__data;						\
 									\
-		if (!cpu_online(raw_smp_processor_id()))		\
-			return;						\
-									\
 		if (!(cond))						\
 			return;						\
 		prercu;							\
@@ -343,15 +340,19 @@ extern void syscall_unregfunc(void);
  * "void *__data, proto" as the callback prototype.
  */
 #define DECLARE_TRACE_NOARGS(name)					\
-		__DECLARE_TRACE(name, void, , 1, void *__data, __data)
+	__DECLARE_TRACE(name, void, ,					\
+			cpu_online(raw_smp_processor_id()),		\
+			void *__data, __data)
 
 #define DECLARE_TRACE(name, proto, args)				\
-		__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1,	\
-				PARAMS(void *__data, proto),		\
-				PARAMS(__data, args))
+	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
+			cpu_online(raw_smp_processor_id()),		\
+			PARAMS(void *__data, proto),			\
+			PARAMS(__data, args))
 
 #define DECLARE_TRACE_CONDITION(name, proto, args, cond)		\
-	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \
+	__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
+			cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
 			PARAMS(void *__data, proto),			\
 			PARAMS(__data, args))
 
-- 
cgit v1.2.3


From d77a117e6871ff78a06def46583d23752593de60 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 9 Mar 2016 14:08:10 -0800
Subject: list: kill list_force_poison()

Given we have uninitialized list_heads being passed to list_add() it
will always be the case that those uninitialized values randomly trigger
the poison value.  Especially since a list_add() operation will seed the
stack with the poison value for later stack allocations to trip over.

For example, see these two false positive reports:

  list_add attempted on force-poisoned entry
  WARNING: at lib/list_debug.c:34
  [..]
  NIP [c00000000043c390] __list_add+0xb0/0x150
  LR [c00000000043c38c] __list_add+0xac/0x150
  Call Trace:
    __list_add+0xac/0x150 (unreliable)
    __down+0x4c/0xf8
    down+0x68/0x70
    xfs_buf_lock+0x4c/0x150 [xfs]

  list_add attempted on force-poisoned entry(0000000000000500),
   new->next == d0000000059ecdb0, new->prev == 0000000000000500
  WARNING: at lib/list_debug.c:33
  [..]
  NIP [c00000000042db78] __list_add+0xa8/0x140
  LR [c00000000042db74] __list_add+0xa4/0x140
  Call Trace:
    __list_add+0xa4/0x140 (unreliable)
    rwsem_down_read_failed+0x6c/0x1a0
    down_read+0x58/0x60
    xfs_log_commit_cil+0x7c/0x600 [xfs]

Fixes: commit 5c2c2587b132 ("mm, dax, pmem: introduce {get|put}_dev_pagemap() for dax-gup")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Eryu Guan <eguan@redhat.com>
Tested-by: Eryu Guan <eguan@redhat.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/list.h b/include/linux/list.h
index 30cf4200ab40..5356f4d661a7 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -113,17 +113,6 @@ extern void __list_del_entry(struct list_head *entry);
 extern void list_del(struct list_head *entry);
 #endif
 
-#ifdef CONFIG_DEBUG_LIST
-/*
- * See devm_memremap_pages() which wants DEBUG_LIST=y to assert if one
- * of the pages it allocates is ever passed to list_add()
- */
-extern void list_force_poison(struct list_head *entry);
-#else
-/* fallback to the less strict LIST_POISON* definitions */
-#define list_force_poison list_del
-#endif
-
 /**
  * list_replace - replace old entry by new one
  * @old : the element to be replaced
-- 
cgit v1.2.3


From e3ae116339f9a0c77523abc95e338fa405946e07 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 9 Mar 2016 14:08:15 -0800
Subject: kasan: add functions to clear stack poison

Functions which the compiler has instrumented for ASAN place poison on
the stack shadow upon entry and remove this poison prior to returning.

In some cases (e.g. hotplug and idle), CPUs may exit the kernel a
number of levels deep in C code.  If there are any instrumented
functions on this critical path, these will leave portions of the idle
thread stack shadow poisoned.

If a CPU returns to the kernel via a different path (e.g. a cold
entry), then depending on stack frame layout subsequent calls to
instrumented functions may use regions of the stack with stale poison,
resulting in (spurious) KASAN splats to the console.

Contemporary GCCs always add stack shadow poisoning when ASAN is
enabled, even when asked to not instrument a function [1], so we can't
simply annotate functions on the critical path to avoid poisoning.

Instead, this series explicitly removes any stale poison before it can
be hit.  In the common hotplug case we clear the entire stack shadow in
common code, before a CPU is brought online.

On architectures which perform a cold return as part of cpu idle may
retain an architecture-specific amount of stack contents.  To retain the
poison for this retained context, the arch code must call the core KASAN
code, passing a "watermark" stack pointer value beyond which shadow will
be cleared.  Architectures which don't perform a cold return as part of
idle do not need any additional code.

This patch (of 3):

Functions which the compiler has instrumented for KASAN place poison on
the stack shadow upon entry and remove this poision prior to returning.

In some cases (e.g.  hotplug and idle), CPUs may exit the kernel a number
of levels deep in C code.  If there are any instrumented functions on this
critical path, these will leave portions of the stack shadow poisoned.

If a CPU returns to the kernel via a different path (e.g.  a cold entry),
then depending on stack frame layout subsequent calls to instrumented
functions may use regions of the stack with stale poison, resulting in
(spurious) KASAN splats to the console.

To avoid this, we must clear stale poison from the stack prior to
instrumented functions being called.  This patch adds functions to the
KASAN core for removing poison from (portions of) a task's stack.  These
will be used by subsequent patches to avoid problems with hotplug and
idle.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 4b9f85c963d0..0fdc798e3ff7 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_KASAN_H
 #define _LINUX_KASAN_H
 
+#include <linux/sched.h>
 #include <linux/types.h>
 
 struct kmem_cache;
@@ -13,7 +14,6 @@ struct vm_struct;
 
 #include <asm/kasan.h>
 #include <asm/pgtable.h>
-#include <linux/sched.h>
 
 extern unsigned char kasan_zero_page[PAGE_SIZE];
 extern pte_t kasan_zero_pte[PTRS_PER_PTE];
@@ -43,6 +43,8 @@ static inline void kasan_disable_current(void)
 
 void kasan_unpoison_shadow(const void *address, size_t size);
 
+void kasan_unpoison_task_stack(struct task_struct *task);
+
 void kasan_alloc_pages(struct page *page, unsigned int order);
 void kasan_free_pages(struct page *page, unsigned int order);
 
@@ -66,6 +68,8 @@ void kasan_free_shadow(const struct vm_struct *vm);
 
 static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
 
+static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
+
 static inline void kasan_enable_current(void) {}
 static inline void kasan_disable_current(void) {}
 
-- 
cgit v1.2.3


From d6b7eaeb03421139e32800324ef04ab50bba886d Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Mar 2016 14:08:38 -0800
Subject: dma-mapping: avoid oops when parameter cpu_addr is null

To keep consistent with kfree, which tolerate ptr is NULL.  We do this
because sometimes we may use goto statement, so that success and failure
case can share parts of the code.  But unfortunately, dma_free_coherent
called with parameter cpu_addr is null will cause oops, such as showed
below:

  Unable to handle kernel paging request at virtual address ffffffc020d3b2b8
  pgd = ffffffc083a61000
  [ffffffc020d3b2b8] *pgd=0000000000000000, *pud=0000000000000000
  CPU: 4 PID: 1489 Comm: malloc_dma_1 Tainted: G           O    4.1.12 #1
  Hardware name: ARM64 (DT)
  PC is at __dma_free_coherent.isra.10+0x74/0xc8
  LR is at __dma_free+0x9c/0xb0
  Process malloc_dma_1 (pid: 1489, stack limit = 0xffffffc0837fc020)
  [...]
  Call trace:
    __dma_free_coherent.isra.10+0x74/0xc8
    __dma_free+0x9c/0xb0
    malloc_dma+0x104/0x158 [dma_alloc_coherent_mtmalloc]
    kthread+0xec/0xfc

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dma-mapping.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 75857cda38e9..728ef074602a 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -386,7 +386,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size,
 	if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
 		return;
 
-	if (!ops->free)
+	if (!ops->free || !cpu_addr)
 		return;
 
 	debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
-- 
cgit v1.2.3


From b2cd27448b33de9069d580d8f229efef434b64e6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sat, 5 Mar 2016 07:13:39 -0300
Subject: [media] media-device: map new functions into old types for legacy API

The legacy media controller userspace API exposes entity types that
carry both type and function information. The new API replaces the type
with a function. It preserves backward compatibility by defining legacy
functions for the existing types and using them in drivers.

This works fine, as long as newer entity functions won't be added.

Unfortunately, some tools, like media-ctl with --print-dot argument
rely on the now legacy MEDIA_ENT_T_V4L2_SUBDEV and MEDIA_ENT_T_DEVNODE
numeric ranges to identify what entities will be shown.

Also, if the entity doesn't match those ranges, it will ignore the
major/minor information on devnodes, and won't be getting the devnode
name via udev or sysfs.

As we're now adding devices outside the old range, the legacy ioctl
needs to map the new entity functions into a type at the old range,
or otherwise we'll have a regression.

Detected on all released media-ctl versions (e. g. versions <= 1.10).

Fix this by deriving the type from the function to emulate the legacy
API if the function isn't in the legacy functions range.

Reported-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/media.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 625b38f65764..a8e3a8c0d85a 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -120,7 +120,7 @@ struct media_device_info {
 
 #define MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN	MEDIA_ENT_F_OLD_SUBDEV_BASE
 
-#ifndef __KERNEL__
+#if !defined(__KERNEL__) || defined(__NEED_MEDIA_LEGACY_API)
 
 /*
  * Legacy symbols used to avoid userspace compilation breakages
@@ -133,6 +133,10 @@ struct media_device_info {
 #define MEDIA_ENT_TYPE_MASK		0x00ff0000
 #define MEDIA_ENT_SUBTYPE_MASK		0x0000ffff
 
+/* End of the old subdev reserved numberspace */
+#define MEDIA_ENT_T_DEVNODE_UNKNOWN	(MEDIA_ENT_T_DEVNODE | \
+					 MEDIA_ENT_SUBTYPE_MASK)
+
 #define MEDIA_ENT_T_DEVNODE		MEDIA_ENT_F_OLD_BASE
 #define MEDIA_ENT_T_DEVNODE_V4L		MEDIA_ENT_F_IO_V4L
 #define MEDIA_ENT_T_DEVNODE_FB		(MEDIA_ENT_T_DEVNODE + 2)
-- 
cgit v1.2.3


From 90d0f0f11588ec692c12f9009089b398be395184 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Sat, 12 Mar 2016 22:56:19 +0800
Subject: block: don't optimize for non-cloned bio in bio_get_last_bvec()

For !BIO_CLONED bio, we can use .bi_vcnt safely, but it
doesn't mean we can just simply return .bi_io_vec[.bi_vcnt - 1]
because the start postion may have been moved in the middle of
the bvec, such as splitting in the middle of bvec.

Fixes: 7bcd79ac50d9(block: bio: introduce helpers to get the 1st and last bvec)
Cc: stable@vger.kernel.org
Reported-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index cb6888824108..88bc64f00bb5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -320,11 +320,6 @@ static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
 	struct bvec_iter iter = bio->bi_iter;
 	int idx;
 
-	if (!bio_flagged(bio, BIO_CLONED)) {
-		*bv = bio->bi_io_vec[bio->bi_vcnt - 1];
-		return;
-	}
-
 	if (unlikely(!bio_multiple_segments(bio))) {
 		*bv = bio_iovec(bio);
 		return;
-- 
cgit v1.2.3