From 72c5296f9d64d8f5f27c2133e5f108a45a353d71 Mon Sep 17 00:00:00 2001
From: Jon Derrick <jonathan.derrick@intel.com>
Date: Tue, 20 Dec 2016 14:38:14 -0700
Subject: genhd: remove dead and duplicated scsi code

blk_scsi_cmd_filter use was deprecated by 4beab5c6 and the SCSI macros
are duplicated in blkdev.h, both likely reintroduced by a bad merge from
540eed56.

Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/genhd.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index e0341af6950e..76f39754e7b0 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -146,15 +146,6 @@ enum {
 	DISK_EVENT_EJECT_REQUEST		= 1 << 1, /* eject requested */
 };
 
-#define BLK_SCSI_MAX_CMDS	(256)
-#define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
-
-struct blk_scsi_cmd_filter {
-	unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
-	unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
-	struct kobject kobj;
-};
-
 struct disk_part_tbl {
 	struct rcu_head rcu_head;
 	int len;
-- 
cgit v1.2.3


From c6dcf52c23d2d3fb5235cec42d7dd3f786b87d55 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 10 Aug 2016 17:22:44 +0200
Subject: mm: Invalidate DAX radix tree entries only if appropriate

Currently invalidate_inode_pages2_range() and invalidate_mapping_pages()
just delete all exceptional radix tree entries they find. For DAX this
is not desirable as we track cache dirtiness in these entries and when
they are evicted, we may not flush caches although it is necessary. This
can for example manifest when we write to the same block both via mmap
and via write(2) (to different offsets) and fsync(2) then does not
properly flush CPU caches when modification via write(2) was the last
one.

Create appropriate DAX functions to handle invalidation of DAX entries
for invalidate_inode_pages2_range() and invalidate_mapping_pages() and
wire them up into the corresponding mm functions.

Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dax.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index f97bcfe79472..24ad71173995 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -41,6 +41,9 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 			struct iomap_ops *ops);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
+int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
+int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
+				      pgoff_t index);
 void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 		pgoff_t index, void *entry, bool wake_all);
 
-- 
cgit v1.2.3


From 1efbd205b3cc5882a8c386c58a57134044e9d5ba Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Wed, 28 Dec 2016 14:58:39 +0200
Subject: Revert "net/mlx5: Add MPCNT register infrastructure"

This reverts commit 7f503169cabd70c1f13b9279c50eca7dfb9a7d51.

Fixes: 7f503169cabd ("net/mlx5: Add MPCNT register infrastructure")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h   |  5 ---
 include/linux/mlx5/driver.h   |  1 -
 include/linux/mlx5/mlx5_ifc.h | 93 -------------------------------------------
 3 files changed, 99 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 9f489365b3d3..52b437431c6a 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1071,11 +1071,6 @@ enum {
 	MLX5_INFINIBAND_PORT_COUNTERS_GROUP   = 0x20,
 };
 
-enum {
-	MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP       = 0x0,
-	MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP = 0x2,
-};
-
 static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
 {
 	if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 0ae55361e674..735b36335f29 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -123,7 +123,6 @@ enum {
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
 	MLX5_REG_MCIA		 = 0x9014,
 	MLX5_REG_MLCR		 = 0x902b,
-	MLX5_REG_MPCNT		 = 0x9051,
 };
 
 enum mlx5_dcbx_oper_mode {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 57bec544e20a..a852e9db6f0d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1757,80 +1757,6 @@ struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits {
 	u8         reserved_at_4c0[0x300];
 };
 
-struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits {
-	u8         life_time_counter_high[0x20];
-
-	u8         life_time_counter_low[0x20];
-
-	u8         rx_errors[0x20];
-
-	u8         tx_errors[0x20];
-
-	u8         l0_to_recovery_eieos[0x20];
-
-	u8         l0_to_recovery_ts[0x20];
-
-	u8         l0_to_recovery_framing[0x20];
-
-	u8         l0_to_recovery_retrain[0x20];
-
-	u8         crc_error_dllp[0x20];
-
-	u8         crc_error_tlp[0x20];
-
-	u8         reserved_at_140[0x680];
-};
-
-struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits {
-	u8         life_time_counter_high[0x20];
-
-	u8         life_time_counter_low[0x20];
-
-	u8         time_to_boot_image_start[0x20];
-
-	u8         time_to_link_image[0x20];
-
-	u8         calibration_time[0x20];
-
-	u8         time_to_first_perst[0x20];
-
-	u8         time_to_detect_state[0x20];
-
-	u8         time_to_l0[0x20];
-
-	u8         time_to_crs_en[0x20];
-
-	u8         time_to_plastic_image_start[0x20];
-
-	u8         time_to_iron_image_start[0x20];
-
-	u8         perst_handler[0x20];
-
-	u8         times_in_l1[0x20];
-
-	u8         times_in_l23[0x20];
-
-	u8         dl_down[0x20];
-
-	u8         config_cycle1usec[0x20];
-
-	u8         config_cycle2to7usec[0x20];
-
-	u8         config_cycle_8to15usec[0x20];
-
-	u8         config_cycle_16_to_63usec[0x20];
-
-	u8         config_cycle_64usec[0x20];
-
-	u8         correctable_err_msg_sent[0x20];
-
-	u8         non_fatal_err_msg_sent[0x20];
-
-	u8         fatal_err_msg_sent[0x20];
-
-	u8         reserved_at_2e0[0x4e0];
-};
-
 struct mlx5_ifc_cmd_inter_comp_event_bits {
 	u8         command_completion_vector[0x20];
 
@@ -2995,12 +2921,6 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
 	u8         reserved_at_0[0x7c0];
 };
 
-union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits {
-	struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits pcie_perf_cntrs_grp_data_layout;
-	struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits pcie_tas_cntrs_grp_data_layout;
-	u8         reserved_at_0[0x7c0];
-};
-
 union mlx5_ifc_event_auto_bits {
 	struct mlx5_ifc_comp_event_bits comp_event;
 	struct mlx5_ifc_dct_events_bits dct_events;
@@ -7320,18 +7240,6 @@ struct mlx5_ifc_ppcnt_reg_bits {
 	union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set;
 };
 
-struct mlx5_ifc_mpcnt_reg_bits {
-	u8         reserved_at_0[0x8];
-	u8         pcie_index[0x8];
-	u8         reserved_at_10[0xa];
-	u8         grp[0x6];
-
-	u8         clr[0x1];
-	u8         reserved_at_21[0x1f];
-
-	union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits counter_set;
-};
-
 struct mlx5_ifc_ppad_reg_bits {
 	u8         reserved_at_0[0x3];
 	u8         single_mac[0x1];
@@ -7937,7 +7845,6 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_pmtu_reg_bits pmtu_reg;
 	struct mlx5_ifc_ppad_reg_bits ppad_reg;
 	struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg;
-	struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg;
 	struct mlx5_ifc_pplm_reg_bits pplm_reg;
 	struct mlx5_ifc_pplr_reg_bits pplr_reg;
 	struct mlx5_ifc_ppsc_reg_bits ppsc_reg;
-- 
cgit v1.2.3


From b91e1302ad9b80c174a4855533f7e3aa2873355e Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 27 Dec 2016 11:40:38 -0800
Subject: mm: optimize PageWaiters bit use for unlock_page()

In commit 62906027091f ("mm: add PageWaiters indicating tasks are
waiting for a page bit") Nick Piggin made our page locking no longer
unconditionally touch the hashed page waitqueue, which not only helps
performance in general, but is particularly helpful on NUMA machines
where the hashed wait queues can bounce around a lot.

However, the "clear lock bit atomically and then test the waiters bit"
sequence turns out to be much more expensive than it needs to be,
because you get a nasty stall when trying to access the same word that
just got updated atomically.

On architectures where locking is done with LL/SC, this would be trivial
to fix with a new primitive that clears one bit and tests another
atomically, but that ends up not working on x86, where the only atomic
operations that return the result end up being cmpxchg and xadd.  The
atomic bit operations return the old value of the same bit we changed,
not the value of an unrelated bit.

On x86, we could put the lock bit in the high bit of the byte, and use
"xadd" with that bit (where the overflow ends up not touching other
bits), and look at the other bits of the result.  However, an even
simpler model is to just use a regular atomic "and" to clear the lock
bit, and then the sign bit in eflags will indicate the resulting state
of the unrelated bit #7.

So by moving the PageWaiters bit up to bit #7, we can atomically clear
the lock bit and test the waiters bit on x86 too.  And architectures
with LL/SC (which is all the usual RISC suspects), the particular bit
doesn't matter, so they are fine with this approach too.

This avoids the extra access to the same atomic word, and thus avoids
the costly stall at page unlock time.

The only downside is that the interface ends up being a bit odd and
specialized: clear a bit in a byte, and test the sign bit.  Nick doesn't
love the resulting name of the new primitive, but I'd rather make the
name be descriptive and very clear about the limitation imposed by
trying to work across all relevant architectures than make it be some
generic thing that doesn't make the odd semantics explicit.

So this introduces the new architecture primitive

    clear_bit_unlock_is_negative_byte();

and adds the trivial implementation for x86.  We have a generic
non-optimized fallback (that just does a "clear_bit()"+"test_bit(7)"
combination) which can be overridden by any architecture that can do
better.  According to Nick, Power has the same hickup x86 has, for
example, but some other architectures may not even care.

All these optimizations mean that my page locking stress-test (which is
just executing a lot of small short-lived shell scripts: "make test" in
the git source tree) no longer makes our page locking look horribly bad.
Before all these optimizations, just the unlock_page() costs were just
over 3% of all CPU overhead on "make test".  After this, it's down to
0.66%, so just a quarter of the cost it used to be.

(The difference on NUMA is bigger, but there this micro-optimization is
likely less noticeable, since the big issue on NUMA was not the accesses
to 'struct page', but the waitqueue accesses that were already removed
by Nick's earlier commit).

Acked-by: Nick Piggin <npiggin@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Bob Peterson <rpeterso@redhat.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Lutomirski <luto@kernel.org>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index c56b39890a41..6b5818d6de32 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -73,13 +73,13 @@
  */
 enum pageflags {
 	PG_locked,		/* Page is locked. Don't touch. */
-	PG_waiters,		/* Page has waiters, check its waitqueue */
 	PG_error,
 	PG_referenced,
 	PG_uptodate,
 	PG_dirty,
 	PG_lru,
 	PG_active,
+	PG_waiters,		/* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
 	PG_slab,
 	PG_owner_priv_1,	/* Owner use. If pagecache, fs may use*/
 	PG_arch_1,
-- 
cgit v1.2.3


From 10b1c04e92229ebeb38ccd0dcf2b6d3ec73c0575 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 29 Dec 2016 18:37:13 +0200
Subject: net/mlx4_core: Fix raw qp flow steering rules under SRIOV

Demoting simple flow steering rule priority (for DPDK) was achieved by
wrapping FW commands MLX4_QP_FLOW_STEERING_ATTACH/DETACH for the PF
as well, and forcing the priority to MLX4_DOMAIN_NIC in the wrapper
function for the PF and all VFs.

In function mlx4_ib_create_flow(), this change caused the main rule
creation for the PF to be wrapped, while it left the associated
tunnel steering rule creation unwrapped for the PF.

This mismatch caused rule deletion failures in mlx4_ib_destroy_flow()
for the PF when the detach wrapper function did not find the associated
tunnel-steering rule (since creation of that rule for the PF did not
go through the wrapper function).

Fix this by setting MLX4_QP_FLOW_STEERING_ATTACH/DETACH to be "native"
(so that the PF invocation does not go through the wrapper), and perform
the required priority demotion for the PF in the mlx4_ib_create_flow()
code path.

Fixes: 48564135cba8 ("net/mlx4_core: Demote simple multicast and broadcast flow steering rules")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 93bdb3485192..6533c16e27ad 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1384,6 +1384,8 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val);
 int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv);
 int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port,
 				      bool *vlan_offload_disabled);
+void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
+				       struct _rule_hw *eth_header);
 int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
-- 
cgit v1.2.3