From 61df56bef97e1708bfbc006b307b00834ad61fe8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 10 May 2017 17:12:52 +0200 Subject: HID: Add mapping for Microsoft Win8 Wireless Radio Controls extensions Microsoft has defined some extra HUT codes for the Generic Desktop Page for Wireless Radio controls, see: https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management https://web.archive.org/web/20170509144631/https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management I've 3 2-in-1 keyboard docks: Dell Venue Pro 11 keyboard dock, HP pavilion x2 keyboard dock and a PEAQ C1010 keyboard dock which have a wireless radio toggle hotkey, which uses the 0x000100c6 HUT code defined in these extensions. This commit adds a mapping for this key, this makes the rfkill toggle hotkey work on the Dell Venue Pro 11 and HP Pavilion X2 keyboards, the PEAQ C1010 keyboard does generate events for the 0x000100c6 HUT code when pressed, but the reported value is always 0. Signed-off-by: Hans de Goede Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 5be325d890d9..0b29466bbc21 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -182,6 +182,12 @@ struct hid_item { #define HID_GD_KEYBOARD 0x00010006 #define HID_GD_KEYPAD 0x00010007 #define HID_GD_MULTIAXIS 0x00010008 +/* + * Microsoft Win8 Wireless Radio Controls extensions CA, see (checked 09052017): + * https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management + * https://web.archive.org/web/20170509144631/https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management + */ +#define HID_GD_WIRELESS_RADIO_CTLS 0x0001000c #define HID_GD_X 0x00010030 #define HID_GD_Y 0x00010031 #define HID_GD_Z 0x00010032 @@ -210,6 +216,10 @@ struct hid_item { #define HID_GD_DOWN 0x00010091 #define HID_GD_RIGHT 0x00010092 #define HID_GD_LEFT 0x00010093 +/* Microsoft Win8 Wireless Radio Controls CA usage codes */ +#define HID_GD_RFKILL_BTN 0x000100c6 +#define HID_GD_RFKILL_LED 0x000100c7 +#define HID_GD_RFKILL_SWITCH 0x000100c8 #define HID_DC_BATTERYSTRENGTH 0x00060020 -- cgit v1.2.3 From 785818fa8385fe55dab253e42a4c6728fca61333 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sat, 29 Apr 2017 11:06:43 +0200 Subject: mtd: nand: add core support for on-die ECC A number of NAND flashes have a capability called "on-die ECC" where the NAND chip itself is capable of detecting and correcting errors. Linux already has support for using the ECC implementation of the NAND controller, or a software based ECC implementation, but not for using the ECC implementation of the NAND controller. However, such an implementation is sometimes useful in situations where the NAND controller provides ECC algorithms that are not strong enough for the NAND chip used on the system. A typical case is a NAND chip that requires a 4-bit ECC, while the NAND controller only provides a 1-bit ECC algorithm. This commit introduces the support for the NAND_ECC_ON_DIE ECC mode: - Parsing of the "on-die" value for the "nand-ecc-mode" Device Tree property - Handling NAND_ECC_ON_DIE case in nand_scan_tail(). The idea is that the vendor specific code for the NAND chip must implement ->read_page() and ->write_page(). It may optionally provide its own ->read_page_raw() and ->write_page_raw() as well. For OOB operation, we assume the standard operations are good enough, but they can be overridden by the vendor specific code if needed. Signed-off-by: Thomas Petazzoni Reviewed-by: Richard Weinberger Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 8f67b1581683..603522097ec9 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -116,6 +116,7 @@ typedef enum { NAND_ECC_HW, NAND_ECC_HW_SYNDROME, NAND_ECC_HW_OOB_FIRST, + NAND_ECC_ON_DIE, } nand_ecc_modes_t; enum nand_ecc_algo { -- cgit v1.2.3 From cc0f51ec111266f5d255e753bf3254ad411d5c12 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sat, 29 Apr 2017 11:06:44 +0200 Subject: mtd: nand: export nand_{read,write}_page_raw() The nand_read_page_raw() and nand_write_page_raw() functions might be re-used by vendor-specific implementations of the read_page/write_page functions. Instead of having vendor-specific code duplicate this code, it is much better to export those functions and allow them to be re-used. Signed-off-by: Thomas Petazzoni Reviewed-by: Richard Weinberger Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 603522097ec9..7a01d2eb7443 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1259,6 +1259,14 @@ int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, int page); +/* Default read_page_raw implementation */ +int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, + uint8_t *buf, int oob_required, int page); + +/* Default write_page_raw implementation */ +int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, + const uint8_t *buf, int oob_required, int page); + /* Reset and initialize a NAND device */ int nand_reset(struct nand_chip *chip, int chipnr); -- cgit v1.2.3 From c512f36b581862df20acef050c3e1a875166bd6f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 09:31:19 +0200 Subject: sunrpc: properly type argument to kxdreproc_t Pass struct rpc_request as the first argument instead of an untyped blob, and mark the data object as const. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton --- include/linux/sunrpc/xdr.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 054c8cde18f3..290f189de200 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -17,6 +17,8 @@ #include #include +struct rpc_rqst; + /* * Buffer adjustment */ @@ -222,7 +224,8 @@ struct xdr_stream { /* * These are the xdr_stream style generic XDR encode and decode functions. */ -typedef void (*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); +typedef void (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + const void *obj); typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); -- cgit v1.2.3 From 73c8dc133afb0cbe72a9234894ea72c2a0e71a73 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 14:58:11 +0200 Subject: sunrpc: properly type argument to kxdrdproc_t Pass struct rpc_request as the first argument instead of an untyped blob. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton Acked-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 290f189de200..ed0fbf0d8d0f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -226,7 +226,8 @@ struct xdr_stream { */ typedef void (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, const void *obj); -typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); +typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + void *obj); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); -- cgit v1.2.3 From 1c5876ddbdb401f814ef717394826e7dfb6704d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 23:27:10 +0200 Subject: sunrpc: move p_count out of struct rpc_procinfo p_count is the only writeable memeber of struct rpc_procinfo, which is a good candidate to be const-ified as it contains function pointers. This patch moves it into out out struct rpc_procinfo, and into a separate writable array that is pointed to by struct rpc_version and indexed by p_statidx. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/clnt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 6095ecba0dde..c75ba37151fe 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -88,6 +88,7 @@ struct rpc_version { u32 number; /* version number */ unsigned int nrprocs; /* number of procs */ struct rpc_procinfo * procs; /* procedure array */ + unsigned int *counts; /* call counts */ }; /* @@ -99,7 +100,6 @@ struct rpc_procinfo { kxdrdproc_t p_decode; /* XDR decode function */ unsigned int p_arglen; /* argument hdr length (u32) */ unsigned int p_replen; /* reply hdr length (u32) */ - unsigned int p_count; /* call count */ unsigned int p_timer; /* Which RTT timer to use */ u32 p_statidx; /* Which procedure to account */ const char * p_name; /* name of procedure */ -- cgit v1.2.3 From 499b4988109e91b76f231fb1b4f1e53ec3260686 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 May 2017 15:36:49 +0200 Subject: sunrpc: mark all struct rpc_procinfo instances as const struct rpc_procinfo contains function pointers, and marking it as constant avoids it being able to be used as an attach vector for code injections. Signed-off-by: Christoph Hellwig Acked-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 4 ++-- include/linux/sunrpc/sched.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index c75ba37151fe..55ef67bea06b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -39,7 +39,7 @@ struct rpc_clnt { struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ struct rpc_xprt __rcu * cl_xprt; /* transport */ - struct rpc_procinfo * cl_procinfo; /* procedure info */ + const struct rpc_procinfo *cl_procinfo; /* procedure info */ u32 cl_prog, /* RPC program number */ cl_vers, /* RPC version number */ cl_maxproc; /* max procedure number */ @@ -87,7 +87,7 @@ struct rpc_program { struct rpc_version { u32 number; /* version number */ unsigned int nrprocs; /* number of procs */ - struct rpc_procinfo * procs; /* procedure array */ + const struct rpc_procinfo *procs; /* procedure array */ unsigned int *counts; /* call counts */ }; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 7ba040c797ec..ed60253abd0a 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -22,7 +22,7 @@ */ struct rpc_procinfo; struct rpc_message { - struct rpc_procinfo * rpc_proc; /* Procedure information */ + const struct rpc_procinfo *rpc_proc; /* Procedure information */ void * rpc_argp; /* Arguments */ void * rpc_resp; /* Result */ struct rpc_cred * rpc_cred; /* Credentials */ -- cgit v1.2.3 From a6beb73272b4c0108e41bc7c7b5a447ae6c92863 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 17:35:49 +0200 Subject: sunrpc: properly type pc_func callbacks Drop the argp and resp arguments as they can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to svc_procfunc as well as the svc_procfunc typedef itself. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/svc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 94631026f79c..5c222af2db41 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -418,9 +418,9 @@ struct svc_version { /* * RPC procedure info */ -typedef __be32 (*svc_procfunc)(struct svc_rqst *, void *argp, void *resp); struct svc_procedure { - svc_procfunc pc_func; /* process the request */ + /* process the request: */ + __be32 (*pc_func)(struct svc_rqst *); kxdrproc_t pc_decode; /* XDR decode args */ kxdrproc_t pc_encode; /* XDR encode result */ kxdrproc_t pc_release; /* XDR free result */ -- cgit v1.2.3 From 8537488b5a2f33980e33f654b0a515304de2b267 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 18:48:24 +0200 Subject: sunrpc: properly type pc_release callbacks Drop the p and resp arguments as they are always NULL or can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to kxdrproc_t. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/svc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 5c222af2db41..1381e1343640 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -423,7 +423,8 @@ struct svc_procedure { __be32 (*pc_func)(struct svc_rqst *); kxdrproc_t pc_decode; /* XDR decode args */ kxdrproc_t pc_encode; /* XDR encode result */ - kxdrproc_t pc_release; /* XDR free result */ + /* XDR free result: */ + void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ unsigned int pc_ressize; /* result struct size */ unsigned int pc_count; /* call count */ -- cgit v1.2.3 From 026fec7e7c4723b5f26a753bbcad69f68c8299d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 19:01:48 +0200 Subject: sunrpc: properly type pc_decode callbacks Drop the argp argument as it can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to kxdrproc_t. Signed-off-by: Christoph Hellwig --- include/linux/lockd/xdr.h | 18 +++++++++--------- include/linux/lockd/xdr4.h | 18 +++++++++--------- include/linux/sunrpc/svc.h | 3 ++- 3 files changed, 20 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index d39ed1cc5fbf..0416600844ce 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -95,19 +95,19 @@ struct nlm_reboot { */ #define NLMSVC_XDRSIZE sizeof(struct nlm_args) -int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *); int nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *); +int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *); +int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *); int nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlmsvc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_decode_res(struct svc_rqst *, __be32 *); int nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *); -int nlmsvc_decode_void(struct svc_rqst *, __be32 *, void *); -int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlmsvc_decode_void(struct svc_rqst *, __be32 *); +int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *); int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlmsvc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *); +int nlmsvc_decode_notify(struct svc_rqst *, __be32 *); +int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *); /* int nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *); int nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *); diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index e58c88b52ce1..951bbe31fdb8 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -23,19 +23,19 @@ -int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *); int nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *); +int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *); +int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *); int nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlm4svc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_decode_res(struct svc_rqst *, __be32 *); int nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *); -int nlm4svc_decode_void(struct svc_rqst *, __be32 *, void *); -int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlm4svc_decode_void(struct svc_rqst *, __be32 *); +int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *); int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlm4svc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *); +int nlm4svc_decode_notify(struct svc_rqst *, __be32 *); +int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *); /* int nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *); int nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 1381e1343640..047f04411dd4 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -421,7 +421,8 @@ struct svc_version { struct svc_procedure { /* process the request: */ __be32 (*pc_func)(struct svc_rqst *); - kxdrproc_t pc_decode; /* XDR decode args */ + /* XDR decode args: */ + int (*pc_decode)(struct svc_rqst *, __be32 *data); kxdrproc_t pc_encode; /* XDR encode result */ /* XDR free result: */ void (*pc_release)(struct svc_rqst *); -- cgit v1.2.3 From 63f8de37951a64cc24479eafd33085537e088075 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 19:42:02 +0200 Subject: sunrpc: properly type pc_encode callbacks Drop the resp argument as it can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to kxdrproc_t. Signed-off-by: Christoph Hellwig Acked-by: Trond Myklebust --- include/linux/lockd/xdr.h | 8 ++++---- include/linux/lockd/xdr4.h | 8 ++++---- include/linux/sunrpc/svc.h | 3 ++- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 0416600844ce..7acbecc21a40 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -96,16 +96,16 @@ struct nlm_reboot { #define NLMSVC_XDRSIZE sizeof(struct nlm_args) int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_encode_testres(struct svc_rqst *, __be32 *); int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *); int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *); int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_encode_res(struct svc_rqst *, __be32 *); int nlmsvc_decode_res(struct svc_rqst *, __be32 *); -int nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *); +int nlmsvc_encode_void(struct svc_rqst *, __be32 *); int nlmsvc_decode_void(struct svc_rqst *, __be32 *); int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *); int nlmsvc_decode_notify(struct svc_rqst *, __be32 *); int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *); /* diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 951bbe31fdb8..bf1645609225 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -24,16 +24,16 @@ int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_encode_testres(struct svc_rqst *, __be32 *); int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *); int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *); int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_encode_res(struct svc_rqst *, __be32 *); int nlm4svc_decode_res(struct svc_rqst *, __be32 *); -int nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *); +int nlm4svc_encode_void(struct svc_rqst *, __be32 *); int nlm4svc_decode_void(struct svc_rqst *, __be32 *); int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *); int nlm4svc_decode_notify(struct svc_rqst *, __be32 *); int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *); /* diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 047f04411dd4..6cfe41db7f31 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -423,7 +423,8 @@ struct svc_procedure { __be32 (*pc_func)(struct svc_rqst *); /* XDR decode args: */ int (*pc_decode)(struct svc_rqst *, __be32 *data); - kxdrproc_t pc_encode; /* XDR encode result */ + /* XDR encode result: */ + int (*pc_encode)(struct svc_rqst *, __be32 *data); /* XDR free result: */ void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ -- cgit v1.2.3 From 35f297e5370bd511a171f7de0c5a31ee661f2e7e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 19:56:10 +0200 Subject: sunrpc: remove kxdrproc_t Remove the now unused typedef. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/xdr.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index ed0fbf0d8d0f..261b48a2701d 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -34,13 +34,6 @@ struct xdr_netobj { u8 * data; }; -/* - * This is the legacy generic XDR function. rqstp is either a rpc_rqst - * (client side) or svc_rqst pointer (server side). - * Encode functions always assume there's enough room in the buffer. - */ -typedef int (*kxdrproc_t)(void *rqstp, __be32 *data, void *obj); - /* * Basic structure for transmission/reception of a client XDR message. * Features a header (for a linear buffer containing RPC headers -- cgit v1.2.3 From 7fd38af9cae6aef1dfd28a7d1bd214eb5ddb7d53 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 23:40:27 +0200 Subject: sunrpc: move pc_count out of struct svc_procinfo pc_count is the only writeable memeber of struct svc_procinfo, which is a good candidate to be const-ified as it contains function pointers. This patch moves it into out out struct svc_procinfo, and into a separate writable array that is pointed to by struct svc_version. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/svc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 6cfe41db7f31..9f00384153f4 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -397,6 +397,7 @@ struct svc_version { u32 vs_vers; /* version number */ u32 vs_nproc; /* number of procedures */ struct svc_procedure * vs_proc; /* per-procedure info */ + unsigned int *vs_count; /* call counts */ u32 vs_xdrsize; /* xdrsize needed for this version */ /* Don't register with rpcbind */ @@ -429,7 +430,6 @@ struct svc_procedure { void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ unsigned int pc_ressize; /* result struct size */ - unsigned int pc_count; /* call count */ unsigned int pc_cachetype; /* cache info (NFS) */ unsigned int pc_xdrressize; /* maximum size of XDR reply */ }; -- cgit v1.2.3 From 860bda29b99afdc072a7a796fe81185f7ae85deb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 May 2017 16:11:49 +0200 Subject: sunrpc: mark all struct svc_procinfo instances as const struct svc_procinfo contains function pointers, and marking it as constant avoids it being able to be used as an attach vector for code injections. Signed-off-by: Christoph Hellwig --- include/linux/lockd/lockd.h | 4 ++-- include/linux/sunrpc/svc.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 41f7b6a04d69..3eca67728366 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -192,9 +192,9 @@ struct nlm_block { * Global variables */ extern const struct rpc_program nlm_program; -extern struct svc_procedure nlmsvc_procedures[]; +extern const struct svc_procedure nlmsvc_procedures[]; #ifdef CONFIG_LOCKD_V4 -extern struct svc_procedure nlmsvc_procedures4[]; +extern const struct svc_procedure nlmsvc_procedures4[]; #endif extern int nlmsvc_grace_period; extern unsigned long nlmsvc_timeout; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 9f00384153f4..984e6b9c3043 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -237,7 +237,7 @@ struct svc_rqst { struct svc_serv * rq_server; /* RPC service definition */ struct svc_pool * rq_pool; /* thread pool */ - struct svc_procedure * rq_procinfo; /* procedure info */ + const struct svc_procedure *rq_procinfo;/* procedure info */ struct auth_ops * rq_authop; /* authentication flavour */ struct svc_cred rq_cred; /* auth info */ void * rq_xprt_ctxt; /* transport specific context ptr */ @@ -396,7 +396,7 @@ struct svc_program { struct svc_version { u32 vs_vers; /* version number */ u32 vs_nproc; /* number of procedures */ - struct svc_procedure * vs_proc; /* per-procedure info */ + const struct svc_procedure *vs_proc; /* per-procedure info */ unsigned int *vs_count; /* call counts */ u32 vs_xdrsize; /* xdrsize needed for this version */ -- cgit v1.2.3 From e9679189e34b25a1b9aa77fe37d331559d1544af Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 May 2017 16:21:37 +0200 Subject: sunrpc: mark all struct svc_version instances as const Signed-off-by: Christoph Hellwig Acked-by: Trond Myklebust --- include/linux/sunrpc/svc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 984e6b9c3043..e85267899753 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -383,7 +383,7 @@ struct svc_program { unsigned int pg_lovers; /* lowest version */ unsigned int pg_hivers; /* highest version */ unsigned int pg_nvers; /* number of versions */ - struct svc_version ** pg_vers; /* version array */ + const struct svc_version **pg_vers; /* version array */ char * pg_name; /* service name */ char * pg_class; /* class name: services sharing authentication */ struct svc_stat * pg_stats; /* rpc statistics */ -- cgit v1.2.3 From cfc5604c488ccd17936b69008af0c9ae050f4a08 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Tue, 25 Apr 2017 22:08:46 +0200 Subject: mtd: spi-nor: introduce SPI 1-2-2 and SPI 1-4-4 protocols This patch changes the prototype of spi_nor_scan(): its 3rd parameter is replaced by a 'struct spi_nor_hwcaps' pointer, which tells the spi-nor framework about the actual hardware capabilities supported by the SPI controller and its driver. Besides, this patch also introduces a new 'struct spi_nor_flash_parameter' telling the spi-nor framework about the hardware capabilities supported by the SPI flash memory and the associated settings required to use those hardware caps. Then, to improve the readability of spi_nor_scan(), the discovery of the memory settings and the memory initialization are now split into two dedicated functions. 1 - spi_nor_init_params() The spi_nor_init_params() function is responsible for initializing the 'struct spi_nor_flash_parameter'. Currently this structure is filled with legacy values but further patches will allow to override some parameter values dynamically, for instance by reading the JESD216 Serial Flash Discoverable Parameter (SFDP) tables from the SPI memory. The spi_nor_init_params() function only deals with the hardware capabilities of the SPI flash memory: especially it doesn't care about the hardware capabilities supported by the SPI controller. 2 - spi_nor_setup() The second function is called once the 'struct spi_nor_flash_parameter' has been initialized by spi_nor_init_params(). With both 'struct spi_nor_flash_parameter' and 'struct spi_nor_hwcaps', the new argument of spi_nor_scan(), spi_nor_setup() computes the best match between hardware caps supported by both the (Q)SPI memory and controller hence selecting the relevant settings for (Fast) Read and Page Program operations. Signed-off-by: Cyrille Pitchen Reviewed-by: Marek Vasut --- include/linux/mtd/spi-nor.h | 119 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 110 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index f2a718030476..60db1585f94c 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -119,13 +119,63 @@ /* Configuration Register bits. */ #define CR_QUAD_EN_SPAN BIT(1) /* Spansion Quad I/O */ -enum read_mode { - SPI_NOR_NORMAL = 0, - SPI_NOR_FAST, - SPI_NOR_DUAL, - SPI_NOR_QUAD, +/* Supported SPI protocols */ +#define SNOR_PROTO_INST_MASK GENMASK(23, 16) +#define SNOR_PROTO_INST_SHIFT 16 +#define SNOR_PROTO_INST(_nbits) \ + ((((unsigned long)(_nbits)) << SNOR_PROTO_INST_SHIFT) & \ + SNOR_PROTO_INST_MASK) + +#define SNOR_PROTO_ADDR_MASK GENMASK(15, 8) +#define SNOR_PROTO_ADDR_SHIFT 8 +#define SNOR_PROTO_ADDR(_nbits) \ + ((((unsigned long)(_nbits)) << SNOR_PROTO_ADDR_SHIFT) & \ + SNOR_PROTO_ADDR_MASK) + +#define SNOR_PROTO_DATA_MASK GENMASK(7, 0) +#define SNOR_PROTO_DATA_SHIFT 0 +#define SNOR_PROTO_DATA(_nbits) \ + ((((unsigned long)(_nbits)) << SNOR_PROTO_DATA_SHIFT) & \ + SNOR_PROTO_DATA_MASK) + +#define SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits) \ + (SNOR_PROTO_INST(_inst_nbits) | \ + SNOR_PROTO_ADDR(_addr_nbits) | \ + SNOR_PROTO_DATA(_data_nbits)) + +enum spi_nor_protocol { + SNOR_PROTO_1_1_1 = SNOR_PROTO_STR(1, 1, 1), + SNOR_PROTO_1_1_2 = SNOR_PROTO_STR(1, 1, 2), + SNOR_PROTO_1_1_4 = SNOR_PROTO_STR(1, 1, 4), + SNOR_PROTO_1_2_2 = SNOR_PROTO_STR(1, 2, 2), + SNOR_PROTO_1_4_4 = SNOR_PROTO_STR(1, 4, 4), + SNOR_PROTO_2_2_2 = SNOR_PROTO_STR(2, 2, 2), + SNOR_PROTO_4_4_4 = SNOR_PROTO_STR(4, 4, 4), }; +static inline u8 spi_nor_get_protocol_inst_nbits(enum spi_nor_protocol proto) +{ + return ((unsigned long)(proto & SNOR_PROTO_INST_MASK)) >> + SNOR_PROTO_INST_SHIFT; +} + +static inline u8 spi_nor_get_protocol_addr_nbits(enum spi_nor_protocol proto) +{ + return ((unsigned long)(proto & SNOR_PROTO_ADDR_MASK)) >> + SNOR_PROTO_ADDR_SHIFT; +} + +static inline u8 spi_nor_get_protocol_data_nbits(enum spi_nor_protocol proto) +{ + return ((unsigned long)(proto & SNOR_PROTO_DATA_MASK)) >> + SNOR_PROTO_DATA_SHIFT; +} + +static inline u8 spi_nor_get_protocol_width(enum spi_nor_protocol proto) +{ + return spi_nor_get_protocol_data_nbits(proto); +} + #define SPI_NOR_MAX_CMD_SIZE 8 enum spi_nor_ops { SPI_NOR_OPS_READ = 0, @@ -154,9 +204,11 @@ enum spi_nor_option_flags { * @read_opcode: the read opcode * @read_dummy: the dummy needed by the read operation * @program_opcode: the program opcode - * @flash_read: the mode of the read * @sst_write_second: used by the SST write operation * @flags: flag options for the current SPI-NOR (SNOR_F_*) + * @read_proto: the SPI protocol for read operations + * @write_proto: the SPI protocol for write operations + * @reg_proto the SPI protocol for read_reg/write_reg/erase operations * @cmd_buf: used by the write_reg * @prepare: [OPTIONAL] do some preparations for the * read/write/erase/lock/unlock operations @@ -185,7 +237,9 @@ struct spi_nor { u8 read_opcode; u8 read_dummy; u8 program_opcode; - enum read_mode flash_read; + enum spi_nor_protocol read_proto; + enum spi_nor_protocol write_proto; + enum spi_nor_protocol reg_proto; bool sst_write_second; u32 flags; u8 cmd_buf[SPI_NOR_MAX_CMD_SIZE]; @@ -219,11 +273,57 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor) return mtd_get_of_node(&nor->mtd); } +/** + * struct spi_nor_hwcaps - Structure for describing the hardware capabilies + * supported by the SPI controller (bus master). + * @mask: the bitmask listing all the supported hw capabilies + */ +struct spi_nor_hwcaps { + u32 mask; +}; + +/* + *(Fast) Read capabilities. + * MUST be ordered by priority: the higher bit position, the higher priority. + * As a matter of performances, it is relevant to use Quad SPI protocols first, + * then Dual SPI protocols before Fast Read and lastly (Slow) Read. + */ +#define SNOR_HWCAPS_READ_MASK GENMASK(7, 0) +#define SNOR_HWCAPS_READ BIT(0) +#define SNOR_HWCAPS_READ_FAST BIT(1) + +#define SNOR_HWCAPS_READ_DUAL GENMASK(4, 2) +#define SNOR_HWCAPS_READ_1_1_2 BIT(2) +#define SNOR_HWCAPS_READ_1_2_2 BIT(3) +#define SNOR_HWCAPS_READ_2_2_2 BIT(4) + +#define SNOR_HWCAPS_READ_QUAD GENMASK(7, 5) +#define SNOR_HWCAPS_READ_1_1_4 BIT(5) +#define SNOR_HWCAPS_READ_1_4_4 BIT(6) +#define SNOR_HWCAPS_READ_4_4_4 BIT(7) + +/* + * Page Program capabilities. + * MUST be ordered by priority: the higher bit position, the higher priority. + * Like (Fast) Read capabilities, Quad SPI protocols are preferred to the + * legacy SPI 1-1-1 protocol. + * Note that Dual Page Programs are not supported because there is no existing + * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory + * implements such commands. + */ +#define SNOR_HWCAPS_PP_MASK GENMASK(19, 16) +#define SNOR_HWCAPS_PP BIT(16) + +#define SNOR_HWCAPS_PP_QUAD GENMASK(19, 17) +#define SNOR_HWCAPS_PP_1_1_4 BIT(17) +#define SNOR_HWCAPS_PP_1_4_4 BIT(18) +#define SNOR_HWCAPS_PP_4_4_4 BIT(19) + /** * spi_nor_scan() - scan the SPI NOR * @nor: the spi_nor structure * @name: the chip type name - * @mode: the read mode supported by the driver + * @hwcaps: the hardware capabilities supported by the controller driver * * The drivers can use this fuction to scan the SPI NOR. * In the scanning, it will try to get all the necessary information to @@ -233,6 +333,7 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor) * * Return: 0 for success, others for failure. */ -int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode); +int spi_nor_scan(struct spi_nor *nor, const char *name, + const struct spi_nor_hwcaps *hwcaps); #endif -- cgit v1.2.3 From 15f55331527b1422eae683477f8a31fdfae93316 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Tue, 25 Apr 2017 22:08:48 +0200 Subject: mtd: spi-nor: introduce Double Transfer Rate (DTR) SPI protocols This patch introduces support to Double Transfer Rate (DTR) SPI protocols. DTR is used only for Fast Read operations. According to manufacturer datasheets, whatever the number of I/O lines used during instruction (x) and address/mode/dummy (y) clock cycles, DTR is used only during data (z) clock cycles of SPI x-y-z protocols. Signed-off-by: Cyrille Pitchen Reviewed-by: Marek Vasut --- include/linux/mtd/spi-nor.h | 48 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 60db1585f94c..313dbe56f31a 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -73,6 +73,15 @@ #define SPINOR_OP_BE_32K_4B 0x5c /* Erase 32KiB block */ #define SPINOR_OP_SE_4B 0xdc /* Sector erase (usually 64KiB) */ +/* Double Transfer Rate opcodes - defined in JEDEC JESD216B. */ +#define SPINOR_OP_READ_1_1_1_DTR 0x0d +#define SPINOR_OP_READ_1_2_2_DTR 0xbd +#define SPINOR_OP_READ_1_4_4_DTR 0xed + +#define SPINOR_OP_READ_1_1_1_DTR_4B 0x0e +#define SPINOR_OP_READ_1_2_2_DTR_4B 0xbe +#define SPINOR_OP_READ_1_4_4_DTR_4B 0xee + /* Used for SST flashes only. */ #define SPINOR_OP_BP 0x02 /* Byte program */ #define SPINOR_OP_WRDI 0x04 /* Write disable */ @@ -138,10 +147,15 @@ ((((unsigned long)(_nbits)) << SNOR_PROTO_DATA_SHIFT) & \ SNOR_PROTO_DATA_MASK) +#define SNOR_PROTO_IS_DTR BIT(24) /* Double Transfer Rate */ + #define SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits) \ (SNOR_PROTO_INST(_inst_nbits) | \ SNOR_PROTO_ADDR(_addr_nbits) | \ SNOR_PROTO_DATA(_data_nbits)) +#define SNOR_PROTO_DTR(_inst_nbits, _addr_nbits, _data_nbits) \ + (SNOR_PROTO_IS_DTR | \ + SNOR_PROTO_STR(_inst_nbits, _addr_nbits, _data_nbits)) enum spi_nor_protocol { SNOR_PROTO_1_1_1 = SNOR_PROTO_STR(1, 1, 1), @@ -151,8 +165,17 @@ enum spi_nor_protocol { SNOR_PROTO_1_4_4 = SNOR_PROTO_STR(1, 4, 4), SNOR_PROTO_2_2_2 = SNOR_PROTO_STR(2, 2, 2), SNOR_PROTO_4_4_4 = SNOR_PROTO_STR(4, 4, 4), + + SNOR_PROTO_1_1_1_DTR = SNOR_PROTO_DTR(1, 1, 1), + SNOR_PROTO_1_2_2_DTR = SNOR_PROTO_DTR(1, 2, 2), + SNOR_PROTO_1_4_4_DTR = SNOR_PROTO_DTR(1, 4, 4), }; +static inline bool spi_nor_protocol_is_dtr(enum spi_nor_protocol proto) +{ + return !!(proto & SNOR_PROTO_IS_DTR); +} + static inline u8 spi_nor_get_protocol_inst_nbits(enum spi_nor_protocol proto) { return ((unsigned long)(proto & SNOR_PROTO_INST_MASK)) >> @@ -288,19 +311,22 @@ struct spi_nor_hwcaps { * As a matter of performances, it is relevant to use Quad SPI protocols first, * then Dual SPI protocols before Fast Read and lastly (Slow) Read. */ -#define SNOR_HWCAPS_READ_MASK GENMASK(7, 0) +#define SNOR_HWCAPS_READ_MASK GENMASK(10, 0) #define SNOR_HWCAPS_READ BIT(0) #define SNOR_HWCAPS_READ_FAST BIT(1) - -#define SNOR_HWCAPS_READ_DUAL GENMASK(4, 2) -#define SNOR_HWCAPS_READ_1_1_2 BIT(2) -#define SNOR_HWCAPS_READ_1_2_2 BIT(3) -#define SNOR_HWCAPS_READ_2_2_2 BIT(4) - -#define SNOR_HWCAPS_READ_QUAD GENMASK(7, 5) -#define SNOR_HWCAPS_READ_1_1_4 BIT(5) -#define SNOR_HWCAPS_READ_1_4_4 BIT(6) -#define SNOR_HWCAPS_READ_4_4_4 BIT(7) +#define SNOR_HWCAPS_READ_1_1_1_DTR BIT(2) + +#define SNOR_HWCAPS_READ_DUAL GENMASK(6, 3) +#define SNOR_HWCAPS_READ_1_1_2 BIT(3) +#define SNOR_HWCAPS_READ_1_2_2 BIT(4) +#define SNOR_HWCAPS_READ_2_2_2 BIT(5) +#define SNOR_HWCAPS_READ_1_2_2_DTR BIT(6) + +#define SNOR_HWCAPS_READ_QUAD GENMASK(10, 7) +#define SNOR_HWCAPS_READ_1_1_4 BIT(7) +#define SNOR_HWCAPS_READ_1_4_4 BIT(8) +#define SNOR_HWCAPS_READ_4_4_4 BIT(9) +#define SNOR_HWCAPS_READ_1_4_4_DTR BIT(10) /* * Page Program capabilities. -- cgit v1.2.3 From fe488a5e48c69204c3b1ad6fa3282e12dbfaabe7 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Tue, 25 Apr 2017 22:08:49 +0200 Subject: mtd: spi-nor: introduce Octo SPI protocols This patch starts adding support to Octo SPI protocols (SPI x-y-8). Op codes for Fast Read and/or Page Program operations using Octo SPI protocols are not known yet (no JEDEC specification has defined them yet) but we'd rather introduce the Octo SPI protocols now so it's done as it should be. Signed-off-by: Cyrille Pitchen Reviewed-by: Marek Vasut --- include/linux/mtd/spi-nor.h | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 313dbe56f31a..55faa2f07cca 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -161,14 +161,18 @@ enum spi_nor_protocol { SNOR_PROTO_1_1_1 = SNOR_PROTO_STR(1, 1, 1), SNOR_PROTO_1_1_2 = SNOR_PROTO_STR(1, 1, 2), SNOR_PROTO_1_1_4 = SNOR_PROTO_STR(1, 1, 4), + SNOR_PROTO_1_1_8 = SNOR_PROTO_STR(1, 1, 8), SNOR_PROTO_1_2_2 = SNOR_PROTO_STR(1, 2, 2), SNOR_PROTO_1_4_4 = SNOR_PROTO_STR(1, 4, 4), + SNOR_PROTO_1_8_8 = SNOR_PROTO_STR(1, 8, 8), SNOR_PROTO_2_2_2 = SNOR_PROTO_STR(2, 2, 2), SNOR_PROTO_4_4_4 = SNOR_PROTO_STR(4, 4, 4), + SNOR_PROTO_8_8_8 = SNOR_PROTO_STR(8, 8, 8), SNOR_PROTO_1_1_1_DTR = SNOR_PROTO_DTR(1, 1, 1), SNOR_PROTO_1_2_2_DTR = SNOR_PROTO_DTR(1, 2, 2), SNOR_PROTO_1_4_4_DTR = SNOR_PROTO_DTR(1, 4, 4), + SNOR_PROTO_1_8_8_DTR = SNOR_PROTO_DTR(1, 8, 8), }; static inline bool spi_nor_protocol_is_dtr(enum spi_nor_protocol proto) @@ -308,10 +312,11 @@ struct spi_nor_hwcaps { /* *(Fast) Read capabilities. * MUST be ordered by priority: the higher bit position, the higher priority. - * As a matter of performances, it is relevant to use Quad SPI protocols first, - * then Dual SPI protocols before Fast Read and lastly (Slow) Read. + * As a matter of performances, it is relevant to use Octo SPI protocols first, + * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly + * (Slow) Read. */ -#define SNOR_HWCAPS_READ_MASK GENMASK(10, 0) +#define SNOR_HWCAPS_READ_MASK GENMASK(14, 0) #define SNOR_HWCAPS_READ BIT(0) #define SNOR_HWCAPS_READ_FAST BIT(1) #define SNOR_HWCAPS_READ_1_1_1_DTR BIT(2) @@ -328,16 +333,22 @@ struct spi_nor_hwcaps { #define SNOR_HWCAPS_READ_4_4_4 BIT(9) #define SNOR_HWCAPS_READ_1_4_4_DTR BIT(10) +#define SNOR_HWCPAS_READ_OCTO GENMASK(14, 11) +#define SNOR_HWCAPS_READ_1_1_8 BIT(11) +#define SNOR_HWCAPS_READ_1_8_8 BIT(12) +#define SNOR_HWCAPS_READ_8_8_8 BIT(13) +#define SNOR_HWCAPS_READ_1_8_8_DTR BIT(14) + /* * Page Program capabilities. * MUST be ordered by priority: the higher bit position, the higher priority. - * Like (Fast) Read capabilities, Quad SPI protocols are preferred to the + * Like (Fast) Read capabilities, Octo/Quad SPI protocols are preferred to the * legacy SPI 1-1-1 protocol. * Note that Dual Page Programs are not supported because there is no existing * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory * implements such commands. */ -#define SNOR_HWCAPS_PP_MASK GENMASK(19, 16) +#define SNOR_HWCAPS_PP_MASK GENMASK(22, 16) #define SNOR_HWCAPS_PP BIT(16) #define SNOR_HWCAPS_PP_QUAD GENMASK(19, 17) @@ -345,6 +356,11 @@ struct spi_nor_hwcaps { #define SNOR_HWCAPS_PP_1_4_4 BIT(18) #define SNOR_HWCAPS_PP_4_4_4 BIT(19) +#define SNOR_HWCAPS_PP_OCTO GENMASK(22, 20) +#define SNOR_HWCAPS_PP_1_1_8 BIT(20) +#define SNOR_HWCAPS_PP_1_8_8 BIT(21) +#define SNOR_HWCAPS_PP_8_8_8 BIT(22) + /** * spi_nor_scan() - scan the SPI NOR * @nor: the spi_nor structure -- cgit v1.2.3 From 15060aba717115dc9f204c02213a7c6bf341163e Mon Sep 17 00:00:00 2001 From: CQ Tang Date: Wed, 10 May 2017 11:39:03 -0700 Subject: iommu/vt-d: Helper function to query if a pasid has any active users A driver would need to know if there are any active references to a a PASID before cleaning up its resources. This function helps check if there are any active users of a PASID before it can perform any recovery on that device. To: Joerg Roedel To: linux-kernel@vger.kernel.org To: David Woodhouse Cc: Jean-Phillipe Brucker Cc: iommu@lists.linux-foundation.org Signed-off-by: CQ Tang Signed-off-by: Ashok Raj Signed-off-by: Joerg Roedel --- include/linux/intel-svm.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 3c25794042f9..99bc5b3ae26e 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -102,6 +102,21 @@ extern int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, */ extern int intel_svm_unbind_mm(struct device *dev, int pasid); +/** + * intel_svm_is_pasid_valid() - check if pasid is valid + * @dev: Device for which PASID was allocated + * @pasid: PASID value to be checked + * + * This function checks if the specified pasid is still valid. A + * valid pasid means the backing mm is still having a valid user. + * For kernel callers init_mm is always valid. for other mm, if mm->mm_users + * is non-zero, it is valid. + * + * returns -EINVAL if invalid pasid, 0 if pasid ref count is invalid + * 1 if pasid is valid. + */ +extern int intel_svm_is_pasid_valid(struct device *dev, int pasid); + #else /* CONFIG_INTEL_IOMMU_SVM */ static inline int intel_svm_bind_mm(struct device *dev, int *pasid, @@ -114,6 +129,11 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid) { BUG(); } + +static int intel_svm_is_pasid_valid(struct device *dev, int pasid) +{ + return -EINVAL; +} #endif /* CONFIG_INTEL_IOMMU_SVM */ #define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL, 0, NULL)) -- cgit v1.2.3 From 91b9ae48aadd7e634161372b0bc3ffc88a050e8b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 May 2017 22:30:33 +0200 Subject: HID: i2c-hid: move header file out of I2C realm include/linux/i2c is not for client devices. Move the header file to a more appropriate location. Signed-off-by: Wolfram Sang Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/i2c/i2c-hid.h | 42 ----------------------------------- include/linux/platform_data/i2c-hid.h | 42 +++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 42 deletions(-) delete mode 100644 include/linux/i2c/i2c-hid.h create mode 100644 include/linux/platform_data/i2c-hid.h (limited to 'include') diff --git a/include/linux/i2c/i2c-hid.h b/include/linux/i2c/i2c-hid.h deleted file mode 100644 index 1fb088239d12..000000000000 --- a/include/linux/i2c/i2c-hid.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * HID over I2C protocol implementation - * - * Copyright (c) 2012 Benjamin Tissoires - * Copyright (c) 2012 Ecole Nationale de l'Aviation Civile, France - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive for - * more details. - */ - -#ifndef __LINUX_I2C_HID_H -#define __LINUX_I2C_HID_H - -#include - -struct regulator; - -/** - * struct i2chid_platform_data - used by hid over i2c implementation. - * @hid_descriptor_address: i2c register where the HID descriptor is stored. - * @supply: regulator for powering on the device. - * @post_power_delay_ms: delay after powering on before device is usable. - * - * Note that it is the responsibility of the platform driver (or the acpi 5.0 - * driver, or the flattened device tree) to setup the irq related to the gpio in - * the struct i2c_board_info. - * The platform driver should also setup the gpio according to the device: - * - * A typical example is the following: - * irq = gpio_to_irq(intr_gpio); - * hkdk4412_i2c_devs5[0].irq = irq; // store the irq in i2c_board_info - * gpio_request(intr_gpio, "elan-irq"); - * s3c_gpio_setpull(intr_gpio, S3C_GPIO_PULL_UP); - */ -struct i2c_hid_platform_data { - u16 hid_descriptor_address; - struct regulator *supply; - int post_power_delay_ms; -}; - -#endif /* __LINUX_I2C_HID_H */ diff --git a/include/linux/platform_data/i2c-hid.h b/include/linux/platform_data/i2c-hid.h new file mode 100644 index 000000000000..1fb088239d12 --- /dev/null +++ b/include/linux/platform_data/i2c-hid.h @@ -0,0 +1,42 @@ +/* + * HID over I2C protocol implementation + * + * Copyright (c) 2012 Benjamin Tissoires + * Copyright (c) 2012 Ecole Nationale de l'Aviation Civile, France + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive for + * more details. + */ + +#ifndef __LINUX_I2C_HID_H +#define __LINUX_I2C_HID_H + +#include + +struct regulator; + +/** + * struct i2chid_platform_data - used by hid over i2c implementation. + * @hid_descriptor_address: i2c register where the HID descriptor is stored. + * @supply: regulator for powering on the device. + * @post_power_delay_ms: delay after powering on before device is usable. + * + * Note that it is the responsibility of the platform driver (or the acpi 5.0 + * driver, or the flattened device tree) to setup the irq related to the gpio in + * the struct i2c_board_info. + * The platform driver should also setup the gpio according to the device: + * + * A typical example is the following: + * irq = gpio_to_irq(intr_gpio); + * hkdk4412_i2c_devs5[0].irq = irq; // store the irq in i2c_board_info + * gpio_request(intr_gpio, "elan-irq"); + * s3c_gpio_setpull(intr_gpio, S3C_GPIO_PULL_UP); + */ +struct i2c_hid_platform_data { + u16 hid_descriptor_address; + struct regulator *supply; + int post_power_delay_ms; +}; + +#endif /* __LINUX_I2C_HID_H */ -- cgit v1.2.3 From 6e7edabfc6a8ac5dce8c55363a7bb1576fc9348f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 11 May 2017 19:11:11 +0200 Subject: HID: Microsoft Win8 Wireless Radio Controls cleanup Use a better URL for the HUTRR40 Radio HID Usages documentation and use the HID_GD_WIRELESS_RADIO_CTLS define rather then hardcoding a check for 0x0001000c. Fixes: 61df56bef9 ("HID: Add mapping for Microsoft Win8 Wireless Radio Controls extensions") Suggested-by: Benjamin Tissoires Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina --- include/linux/hid.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 0b29466bbc21..bebbf4893448 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -183,9 +183,8 @@ struct hid_item { #define HID_GD_KEYPAD 0x00010007 #define HID_GD_MULTIAXIS 0x00010008 /* - * Microsoft Win8 Wireless Radio Controls extensions CA, see (checked 09052017): - * https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management - * https://web.archive.org/web/20170509144631/https://docs.microsoft.com/en-us/windows-hardware/drivers/hid/airplane-mode-radio-management + * Microsoft Win8 Wireless Radio Controls extensions CA, see: + * http://www.usb.org/developers/hidpage/HUTRR40RadioHIDUsagesFinal.pdf */ #define HID_GD_WIRELESS_RADIO_CTLS 0x0001000c #define HID_GD_X 0x00010030 -- cgit v1.2.3 From b9109b0e49b93b0ae663330acb36561b8f4f6905 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 10 May 2017 11:28:38 -0700 Subject: f2fs: remove unnecessary read cases in merged IO flow Merged IO flow doesn't need to care about read IOs. f2fs_submit_merged_bio -> f2fs_submit_merged_write f2fs_submit_merged_bios -> f2fs_submit_merged_writes f2fs_submit_merged_bio_cond -> f2fs_submit_merged_write_cond Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 15da88c5c3a4..5805d92893a8 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -790,7 +790,7 @@ DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio, TP_CONDITION(page->mapping) ); -DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio, +DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_write, TP_PROTO(struct page *page, struct f2fs_io_info *fio), -- cgit v1.2.3 From a912b54d3aaa011266dc266e3694f782f27233cf Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 10 May 2017 11:18:25 -0700 Subject: f2fs: split bio cache Split DATA/NODE type bio cache according to different temperature, so write IOs with the same temperature can be merged in corresponding bio cache as much as possible, otherwise, different temperature write IOs submitting into one bio cache will always cause split of bio. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 5805d92893a8..6f77a2755abb 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -19,6 +19,9 @@ TRACE_DEFINE_ENUM(INMEM_INVALIDATE); TRACE_DEFINE_ENUM(INMEM_REVOKE); TRACE_DEFINE_ENUM(IPU); TRACE_DEFINE_ENUM(OPU); +TRACE_DEFINE_ENUM(HOT); +TRACE_DEFINE_ENUM(WARM); +TRACE_DEFINE_ENUM(COLD); TRACE_DEFINE_ENUM(CURSEG_HOT_DATA); TRACE_DEFINE_ENUM(CURSEG_WARM_DATA); TRACE_DEFINE_ENUM(CURSEG_COLD_DATA); @@ -59,6 +62,12 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); { IPU, "IN-PLACE" }, \ { OPU, "OUT-OF-PLACE" }) +#define show_block_temp(temp) \ + __print_symbolic(temp, \ + { HOT, "HOT" }, \ + { WARM, "WARM" }, \ + { COLD, "COLD" }) + #define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_META | REQ_PRIO | \ REQ_PREFLUSH | REQ_FUA) #define F2FS_BIO_FLAG_MASK(t) (t & F2FS_OP_FLAGS) @@ -757,6 +766,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, __field(block_t, new_blkaddr) __field(int, op) __field(int, op_flags) + __field(int, temp) __field(int, type) ), @@ -768,16 +778,18 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, __entry->new_blkaddr = fio->new_blkaddr; __entry->op = fio->op; __entry->op_flags = fio->op_flags; + __entry->temp = fio->temp; __entry->type = fio->type; ), TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " - "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s", + "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s_%s", show_dev_ino(__entry), (unsigned long)__entry->index, (unsigned long long)__entry->old_blkaddr, (unsigned long long)__entry->new_blkaddr, show_bio_type(__entry->op, __entry->op_flags), + show_block_temp(__entry->temp), show_block_type(__entry->type)) ); -- cgit v1.2.3 From 3e08b2df12983159ea2d9a1aa2b2bc601093b3cb Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:12 +0530 Subject: thermal: cpu_cooling: remove cpufreq_cooling_get_level() There is only one user of cpufreq_cooling_get_level() and that already has pointer to the cpufreq_cdev structure. It can directly call get_level() instead and we can get rid of cpufreq_cooling_get_level(). Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- include/linux/cpu_cooling.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index c156f5082758..96c5e4c2f9c8 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -82,7 +82,6 @@ of_cpufreq_power_cooling_register(struct device_node *np, */ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev); -unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq); #else /* !CONFIG_CPU_THERMAL */ static inline struct thermal_cooling_device * cpufreq_cooling_register(const struct cpumask *clip_cpus) @@ -117,11 +116,6 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) { return; } -static inline -unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) -{ - return THERMAL_CSTATE_INVALID; -} #endif /* CONFIG_CPU_THERMAL */ #endif /* __CPU_COOLING_H__ */ -- cgit v1.2.3 From 4d753aa7b6279e4b7d338947a434689962f430d1 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:14 +0530 Subject: thermal: cpu_cooling: use cpufreq_policy to register cooling device The CPU cooling driver uses the cpufreq policy, to get clip_cpus, the frequency table, etc. Most of the callers of CPU cooling driver's registration routines have the cpufreq policy with them, but they only pass the policy->related_cpus cpumask. The __cpufreq_cooling_register() routine then gets the policy by itself and uses it. It would be much better if the callers can pass the policy instead directly. This also fixes a basic design flaw, where the policy can be freed while the CPU cooling driver is still active. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- include/linux/cpu_cooling.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index 96c5e4c2f9c8..d4292ebc5c8b 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -28,47 +28,49 @@ #include #include +struct cpufreq_policy; + typedef int (*get_static_t)(cpumask_t *cpumask, int interval, unsigned long voltage, u32 *power); #ifdef CONFIG_CPU_THERMAL /** * cpufreq_cooling_register - function to create cpufreq cooling device. - * @clip_cpus: cpumask of cpus where the frequency constraints will happen + * @policy: cpufreq policy. */ struct thermal_cooling_device * -cpufreq_cooling_register(const struct cpumask *clip_cpus); +cpufreq_cooling_register(struct cpufreq_policy *policy); struct thermal_cooling_device * -cpufreq_power_cooling_register(const struct cpumask *clip_cpus, +cpufreq_power_cooling_register(struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func); /** * of_cpufreq_cooling_register - create cpufreq cooling device based on DT. * @np: a valid struct device_node to the cooling device device tree node. - * @clip_cpus: cpumask of cpus where the frequency constraints will happen + * @policy: cpufreq policy. */ #ifdef CONFIG_THERMAL_OF struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus); + struct cpufreq_policy *policy); struct thermal_cooling_device * of_cpufreq_power_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus, + struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func); #else static inline struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus) + struct cpufreq_policy *policy) { return ERR_PTR(-ENOSYS); } static inline struct thermal_cooling_device * of_cpufreq_power_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus, + struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func) { @@ -84,12 +86,12 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev); #else /* !CONFIG_CPU_THERMAL */ static inline struct thermal_cooling_device * -cpufreq_cooling_register(const struct cpumask *clip_cpus) +cpufreq_cooling_register(struct cpufreq_policy *policy) { return ERR_PTR(-ENOSYS); } static inline struct thermal_cooling_device * -cpufreq_power_cooling_register(const struct cpumask *clip_cpus, +cpufreq_power_cooling_register(struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func) { return NULL; @@ -97,14 +99,14 @@ cpufreq_power_cooling_register(const struct cpumask *clip_cpus, static inline struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus) + struct cpufreq_policy *policy) { return ERR_PTR(-ENOSYS); } static inline struct thermal_cooling_device * of_cpufreq_power_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus, + struct cpufreq_policy *policy, u32 capacitance, get_static_t plat_static_func) { -- cgit v1.2.3 From 55d852931319d2e3ccde86cd426405231ce6c6ac Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Apr 2017 15:57:15 +0530 Subject: cpufreq: create cpufreq_table_count_valid_entries() We need such a routine at two places already, lets create one. Signed-off-by: Viresh Kumar Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Signed-off-by: Eduardo Valentin --- include/linux/cpufreq.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index a5ce0bbeadb5..eb9abfadaeac 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -862,6 +862,20 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, return -EINVAL; } } + +static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy *policy) +{ + struct cpufreq_frequency_table *pos; + int count = 0; + + if (unlikely(!policy->freq_table)) + return 0; + + cpufreq_for_each_valid_entry(pos, policy->freq_table) + count++; + + return count; +} #else static inline int cpufreq_boost_trigger_state(int state) { -- cgit v1.2.3 From 4a78cc644eed3cf2dae00c3a959910a86c140fd6 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 26 May 2017 17:10:15 +0200 Subject: mtd: nand: Make sure drivers not supporting SET/GET_FEATURES return -ENOTSUPP A lot of drivers are providing their own ->cmdfunc(), and most of the time this implementation does not support all possible NAND operations. But since ->cmdfunc() cannot return an error code, the core has no way to know that the operation it requested is not supported. This is a problem we cannot address for all kind of operations with the current design, but we can prevent these silent failures for the GET/SET FEATURES operation by overloading the default ->onfi_{set,get}_features() methods with one returning -ENOTSUPP. Reported-by: Chris Packham Signed-off-by: Boris Brezillon Tested-by: Chris Packham --- include/linux/mtd/nand.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 7a01d2eb7443..28f7dd9177e9 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1259,6 +1259,11 @@ int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, int page); +/* Stub used by drivers that do not support GET/SET FEATURES operations */ +int nand_onfi_get_set_features_notsupp(struct mtd_info *mtd, + struct nand_chip *chip, int addr, + u8 *subfeature_param); + /* Default read_page_raw implementation */ int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, int page); -- cgit v1.2.3 From 22c78d1cce104072747023d2ae0351bf3f97d725 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 23 May 2017 12:27:17 +0200 Subject: i2c: break out smbus support into separate file Break out the exported SMBus functions and the emulation layer into a separate file. This also involved splitting up the tracing header into an I2C and an SMBus part. Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/trace/events/i2c.h | 226 +-------------------------------------- include/trace/events/smbus.h | 249 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 251 insertions(+), 224 deletions(-) create mode 100644 include/trace/events/smbus.h (limited to 'include') diff --git a/include/trace/events/i2c.h b/include/trace/events/i2c.h index 4abb8eab34d3..86a401190df9 100644 --- a/include/trace/events/i2c.h +++ b/include/trace/events/i2c.h @@ -1,4 +1,4 @@ -/* I2C and SMBUS message transfer tracepoints +/* I2C message transfer tracepoints * * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) @@ -18,7 +18,7 @@ #include /* - * drivers/i2c/i2c-core.c + * drivers/i2c/i2c-core-base.c */ extern int i2c_transfer_trace_reg(void); extern void i2c_transfer_trace_unreg(void); @@ -144,228 +144,6 @@ TRACE_EVENT_FN(i2c_result, i2c_transfer_trace_reg, i2c_transfer_trace_unreg); -/* - * i2c_smbus_xfer() write data or procedure call request - */ -TRACE_EVENT_CONDITION(smbus_write, - TP_PROTO(const struct i2c_adapter *adap, - u16 addr, unsigned short flags, - char read_write, u8 command, int protocol, - const union i2c_smbus_data *data), - TP_ARGS(adap, addr, flags, read_write, command, protocol, data), - TP_CONDITION(read_write == I2C_SMBUS_WRITE || - protocol == I2C_SMBUS_PROC_CALL || - protocol == I2C_SMBUS_BLOCK_PROC_CALL), - TP_STRUCT__entry( - __field(int, adapter_nr ) - __field(__u16, addr ) - __field(__u16, flags ) - __field(__u8, command ) - __field(__u8, len ) - __field(__u32, protocol ) - __array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2) ), - TP_fast_assign( - __entry->adapter_nr = adap->nr; - __entry->addr = addr; - __entry->flags = flags; - __entry->command = command; - __entry->protocol = protocol; - - switch (protocol) { - case I2C_SMBUS_BYTE_DATA: - __entry->len = 1; - goto copy; - case I2C_SMBUS_WORD_DATA: - case I2C_SMBUS_PROC_CALL: - __entry->len = 2; - goto copy; - case I2C_SMBUS_BLOCK_DATA: - case I2C_SMBUS_BLOCK_PROC_CALL: - case I2C_SMBUS_I2C_BLOCK_DATA: - __entry->len = data->block[0] + 1; - copy: - memcpy(__entry->buf, data->block, __entry->len); - break; - case I2C_SMBUS_QUICK: - case I2C_SMBUS_BYTE: - case I2C_SMBUS_I2C_BLOCK_BROKEN: - default: - __entry->len = 0; - } - ), - TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]", - __entry->adapter_nr, - __entry->addr, - __entry->flags, - __entry->command, - __print_symbolic(__entry->protocol, - { I2C_SMBUS_QUICK, "QUICK" }, - { I2C_SMBUS_BYTE, "BYTE" }, - { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, - { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, - { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, - { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, - { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, - { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, - { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }), - __entry->len, - __entry->len, __entry->buf - )); - -/* - * i2c_smbus_xfer() read data request - */ -TRACE_EVENT_CONDITION(smbus_read, - TP_PROTO(const struct i2c_adapter *adap, - u16 addr, unsigned short flags, - char read_write, u8 command, int protocol), - TP_ARGS(adap, addr, flags, read_write, command, protocol), - TP_CONDITION(!(read_write == I2C_SMBUS_WRITE || - protocol == I2C_SMBUS_PROC_CALL || - protocol == I2C_SMBUS_BLOCK_PROC_CALL)), - TP_STRUCT__entry( - __field(int, adapter_nr ) - __field(__u16, flags ) - __field(__u16, addr ) - __field(__u8, command ) - __field(__u32, protocol ) - __array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2) ), - TP_fast_assign( - __entry->adapter_nr = adap->nr; - __entry->addr = addr; - __entry->flags = flags; - __entry->command = command; - __entry->protocol = protocol; - ), - TP_printk("i2c-%d a=%03x f=%04x c=%x %s", - __entry->adapter_nr, - __entry->addr, - __entry->flags, - __entry->command, - __print_symbolic(__entry->protocol, - { I2C_SMBUS_QUICK, "QUICK" }, - { I2C_SMBUS_BYTE, "BYTE" }, - { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, - { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, - { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, - { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, - { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, - { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, - { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }) - )); - -/* - * i2c_smbus_xfer() read data or procedure call reply - */ -TRACE_EVENT_CONDITION(smbus_reply, - TP_PROTO(const struct i2c_adapter *adap, - u16 addr, unsigned short flags, - char read_write, u8 command, int protocol, - const union i2c_smbus_data *data), - TP_ARGS(adap, addr, flags, read_write, command, protocol, data), - TP_CONDITION(read_write == I2C_SMBUS_READ), - TP_STRUCT__entry( - __field(int, adapter_nr ) - __field(__u16, addr ) - __field(__u16, flags ) - __field(__u8, command ) - __field(__u8, len ) - __field(__u32, protocol ) - __array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2) ), - TP_fast_assign( - __entry->adapter_nr = adap->nr; - __entry->addr = addr; - __entry->flags = flags; - __entry->command = command; - __entry->protocol = protocol; - - switch (protocol) { - case I2C_SMBUS_BYTE: - case I2C_SMBUS_BYTE_DATA: - __entry->len = 1; - goto copy; - case I2C_SMBUS_WORD_DATA: - case I2C_SMBUS_PROC_CALL: - __entry->len = 2; - goto copy; - case I2C_SMBUS_BLOCK_DATA: - case I2C_SMBUS_BLOCK_PROC_CALL: - case I2C_SMBUS_I2C_BLOCK_DATA: - __entry->len = data->block[0] + 1; - copy: - memcpy(__entry->buf, data->block, __entry->len); - break; - case I2C_SMBUS_QUICK: - case I2C_SMBUS_I2C_BLOCK_BROKEN: - default: - __entry->len = 0; - } - ), - TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]", - __entry->adapter_nr, - __entry->addr, - __entry->flags, - __entry->command, - __print_symbolic(__entry->protocol, - { I2C_SMBUS_QUICK, "QUICK" }, - { I2C_SMBUS_BYTE, "BYTE" }, - { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, - { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, - { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, - { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, - { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, - { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, - { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }), - __entry->len, - __entry->len, __entry->buf - )); - -/* - * i2c_smbus_xfer() result - */ -TRACE_EVENT(smbus_result, - TP_PROTO(const struct i2c_adapter *adap, - u16 addr, unsigned short flags, - char read_write, u8 command, int protocol, - int res), - TP_ARGS(adap, addr, flags, read_write, command, protocol, res), - TP_STRUCT__entry( - __field(int, adapter_nr ) - __field(__u16, addr ) - __field(__u16, flags ) - __field(__u8, read_write ) - __field(__u8, command ) - __field(__s16, res ) - __field(__u32, protocol ) - ), - TP_fast_assign( - __entry->adapter_nr = adap->nr; - __entry->addr = addr; - __entry->flags = flags; - __entry->read_write = read_write; - __entry->command = command; - __entry->protocol = protocol; - __entry->res = res; - ), - TP_printk("i2c-%d a=%03x f=%04x c=%x %s %s res=%d", - __entry->adapter_nr, - __entry->addr, - __entry->flags, - __entry->command, - __print_symbolic(__entry->protocol, - { I2C_SMBUS_QUICK, "QUICK" }, - { I2C_SMBUS_BYTE, "BYTE" }, - { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, - { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, - { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, - { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, - { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, - { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, - { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }), - __entry->read_write == I2C_SMBUS_WRITE ? "wr" : "rd", - __entry->res - )); - #endif /* _TRACE_I2C_H */ /* This part must be outside protection */ diff --git a/include/trace/events/smbus.h b/include/trace/events/smbus.h new file mode 100644 index 000000000000..d2fb6e1d3e10 --- /dev/null +++ b/include/trace/events/smbus.h @@ -0,0 +1,249 @@ +/* SMBUS message transfer tracepoints + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM smbus + +#if !defined(_TRACE_SMBUS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SMBUS_H + +#include +#include + +/* + * drivers/i2c/i2c-core-smbus.c + */ + +/* + * i2c_smbus_xfer() write data or procedure call request + */ +TRACE_EVENT_CONDITION(smbus_write, + TP_PROTO(const struct i2c_adapter *adap, + u16 addr, unsigned short flags, + char read_write, u8 command, int protocol, + const union i2c_smbus_data *data), + TP_ARGS(adap, addr, flags, read_write, command, protocol, data), + TP_CONDITION(read_write == I2C_SMBUS_WRITE || + protocol == I2C_SMBUS_PROC_CALL || + protocol == I2C_SMBUS_BLOCK_PROC_CALL), + TP_STRUCT__entry( + __field(int, adapter_nr ) + __field(__u16, addr ) + __field(__u16, flags ) + __field(__u8, command ) + __field(__u8, len ) + __field(__u32, protocol ) + __array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2) ), + TP_fast_assign( + __entry->adapter_nr = adap->nr; + __entry->addr = addr; + __entry->flags = flags; + __entry->command = command; + __entry->protocol = protocol; + + switch (protocol) { + case I2C_SMBUS_BYTE_DATA: + __entry->len = 1; + goto copy; + case I2C_SMBUS_WORD_DATA: + case I2C_SMBUS_PROC_CALL: + __entry->len = 2; + goto copy; + case I2C_SMBUS_BLOCK_DATA: + case I2C_SMBUS_BLOCK_PROC_CALL: + case I2C_SMBUS_I2C_BLOCK_DATA: + __entry->len = data->block[0] + 1; + copy: + memcpy(__entry->buf, data->block, __entry->len); + break; + case I2C_SMBUS_QUICK: + case I2C_SMBUS_BYTE: + case I2C_SMBUS_I2C_BLOCK_BROKEN: + default: + __entry->len = 0; + } + ), + TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]", + __entry->adapter_nr, + __entry->addr, + __entry->flags, + __entry->command, + __print_symbolic(__entry->protocol, + { I2C_SMBUS_QUICK, "QUICK" }, + { I2C_SMBUS_BYTE, "BYTE" }, + { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, + { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, + { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, + { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, + { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, + { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, + { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }), + __entry->len, + __entry->len, __entry->buf + )); + +/* + * i2c_smbus_xfer() read data request + */ +TRACE_EVENT_CONDITION(smbus_read, + TP_PROTO(const struct i2c_adapter *adap, + u16 addr, unsigned short flags, + char read_write, u8 command, int protocol), + TP_ARGS(adap, addr, flags, read_write, command, protocol), + TP_CONDITION(!(read_write == I2C_SMBUS_WRITE || + protocol == I2C_SMBUS_PROC_CALL || + protocol == I2C_SMBUS_BLOCK_PROC_CALL)), + TP_STRUCT__entry( + __field(int, adapter_nr ) + __field(__u16, flags ) + __field(__u16, addr ) + __field(__u8, command ) + __field(__u32, protocol ) + __array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2) ), + TP_fast_assign( + __entry->adapter_nr = adap->nr; + __entry->addr = addr; + __entry->flags = flags; + __entry->command = command; + __entry->protocol = protocol; + ), + TP_printk("i2c-%d a=%03x f=%04x c=%x %s", + __entry->adapter_nr, + __entry->addr, + __entry->flags, + __entry->command, + __print_symbolic(__entry->protocol, + { I2C_SMBUS_QUICK, "QUICK" }, + { I2C_SMBUS_BYTE, "BYTE" }, + { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, + { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, + { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, + { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, + { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, + { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, + { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }) + )); + +/* + * i2c_smbus_xfer() read data or procedure call reply + */ +TRACE_EVENT_CONDITION(smbus_reply, + TP_PROTO(const struct i2c_adapter *adap, + u16 addr, unsigned short flags, + char read_write, u8 command, int protocol, + const union i2c_smbus_data *data), + TP_ARGS(adap, addr, flags, read_write, command, protocol, data), + TP_CONDITION(read_write == I2C_SMBUS_READ), + TP_STRUCT__entry( + __field(int, adapter_nr ) + __field(__u16, addr ) + __field(__u16, flags ) + __field(__u8, command ) + __field(__u8, len ) + __field(__u32, protocol ) + __array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2) ), + TP_fast_assign( + __entry->adapter_nr = adap->nr; + __entry->addr = addr; + __entry->flags = flags; + __entry->command = command; + __entry->protocol = protocol; + + switch (protocol) { + case I2C_SMBUS_BYTE: + case I2C_SMBUS_BYTE_DATA: + __entry->len = 1; + goto copy; + case I2C_SMBUS_WORD_DATA: + case I2C_SMBUS_PROC_CALL: + __entry->len = 2; + goto copy; + case I2C_SMBUS_BLOCK_DATA: + case I2C_SMBUS_BLOCK_PROC_CALL: + case I2C_SMBUS_I2C_BLOCK_DATA: + __entry->len = data->block[0] + 1; + copy: + memcpy(__entry->buf, data->block, __entry->len); + break; + case I2C_SMBUS_QUICK: + case I2C_SMBUS_I2C_BLOCK_BROKEN: + default: + __entry->len = 0; + } + ), + TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]", + __entry->adapter_nr, + __entry->addr, + __entry->flags, + __entry->command, + __print_symbolic(__entry->protocol, + { I2C_SMBUS_QUICK, "QUICK" }, + { I2C_SMBUS_BYTE, "BYTE" }, + { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, + { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, + { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, + { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, + { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, + { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, + { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }), + __entry->len, + __entry->len, __entry->buf + )); + +/* + * i2c_smbus_xfer() result + */ +TRACE_EVENT(smbus_result, + TP_PROTO(const struct i2c_adapter *adap, + u16 addr, unsigned short flags, + char read_write, u8 command, int protocol, + int res), + TP_ARGS(adap, addr, flags, read_write, command, protocol, res), + TP_STRUCT__entry( + __field(int, adapter_nr ) + __field(__u16, addr ) + __field(__u16, flags ) + __field(__u8, read_write ) + __field(__u8, command ) + __field(__s16, res ) + __field(__u32, protocol ) + ), + TP_fast_assign( + __entry->adapter_nr = adap->nr; + __entry->addr = addr; + __entry->flags = flags; + __entry->read_write = read_write; + __entry->command = command; + __entry->protocol = protocol; + __entry->res = res; + ), + TP_printk("i2c-%d a=%03x f=%04x c=%x %s %s res=%d", + __entry->adapter_nr, + __entry->addr, + __entry->flags, + __entry->command, + __print_symbolic(__entry->protocol, + { I2C_SMBUS_QUICK, "QUICK" }, + { I2C_SMBUS_BYTE, "BYTE" }, + { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, + { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, + { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, + { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, + { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, + { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, + { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }), + __entry->read_write == I2C_SMBUS_WRITE ? "wr" : "rd", + __entry->res + )); + +#endif /* _TRACE_SMBUS_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 9748e1d87573c94191442d6bd0307f523e5cd8b8 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sat, 29 Apr 2017 11:06:45 +0200 Subject: mtd: nand: add support for Micron on-die ECC Now that the core NAND subsystem has support for on-die ECC, this commit brings the necessary code to support on-die ECC on Micron NANDs. In micron_nand_init(), we detect if the Micron NAND chip supports on-die ECC mode, by checking a number of conditions: - It must be an ONFI NAND - It must be a SLC NAND - Enabling *and* disabling on-die ECC must work - The on-die ECC must be correcting 4 bits per 512 bytes of data. Some Micron NAND chips have an on-die ECC able to correct 8 bits per 512 bytes of data, but they work slightly differently and therefore we don't support them in this patch. Then, if the on-die ECC cannot be disabled (some Micron NAND have on-die ECC forcefully enabled), we bail out, as we don't support such NANDs. Indeed, the implementation of raw_read()/raw_write() make the assumption that on-die ECC can be disabled. Support for Micron NANDs with on-die ECC forcefully enabled can easily be added, but in the absence of such HW for testing, we preferred to simply bail out. If the on-die ECC is supported, and requested in the Device Tree, then it is indeed enabled, by using custom implementations of the ->read_page(), ->read_page_raw(), ->write_page() and ->write_page_raw() operation to properly handle the on-die ECC. In the non-raw functions, we need to enable the internal ECC engine before issuing the NAND_CMD_READ0 or NAND_CMD_SEQIN commands, which is why we set the NAND_ECC_CUSTOM_PAGE_ACCESS option at initialization time (it asks the NAND core to let the NAND driver issue those commands). Signed-off-by: Thomas Petazzoni Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 28f7dd9177e9..893d0ce08030 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -258,6 +258,8 @@ struct nand_chip; /* Vendor-specific feature address (Micron) */ #define ONFI_FEATURE_ADDR_READ_RETRY 0x89 +#define ONFI_FEATURE_ON_DIE_ECC 0x90 +#define ONFI_FEATURE_ON_DIE_ECC_EN BIT(3) /* ONFI subfeature parameters length */ #define ONFI_SUBFEATURE_PARAM_LEN 4 -- cgit v1.2.3 From 104e442a67cfba4d0cc982384761befb917fb6a1 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 16 Mar 2017 09:35:58 +0100 Subject: mtd: nand: Pass the CS line to ->setup_data_interface() Some NAND controllers can assign different NAND timings to different CS lines. Pass the CS line information to ->setup_data_interface() so that the NAND controller driver knows which CS line is concerned by the setup_data_interface() request. Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 893d0ce08030..9de3686e738c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -107,6 +107,8 @@ int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); #define NAND_STATUS_READY 0x40 #define NAND_STATUS_WP 0x80 +#define NAND_DATA_IFACE_CHECK_ONLY -1 + /* * Constants for ECC_MODES */ @@ -818,7 +820,10 @@ struct nand_manufacturer_ops { * @read_retries: [INTERN] the number of read retry modes supported * @onfi_set_features: [REPLACEABLE] set the features for ONFI nand * @onfi_get_features: [REPLACEABLE] get the features for ONFI nand - * @setup_data_interface: [OPTIONAL] setup the data interface and timing + * @setup_data_interface: [OPTIONAL] setup the data interface and timing. If + * chipnr is set to %NAND_DATA_IFACE_CHECK_ONLY this + * means the configuration should not be applied but + * only checked. * @bbt: [INTERN] bad block table pointer * @bbt_td: [REPLACEABLE] bad block table descriptor for flash * lookup. @@ -862,9 +867,8 @@ struct nand_chip { int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip, int feature_addr, uint8_t *subfeature_para); int (*setup_read_retry)(struct mtd_info *mtd, int retry_mode); - int (*setup_data_interface)(struct mtd_info *mtd, - const struct nand_data_interface *conf, - bool check_only); + int (*setup_data_interface)(struct mtd_info *mtd, int chipnr, + const struct nand_data_interface *conf); int chip_delay; -- cgit v1.2.3 From 7d135bcced20be2b50128432c5426a7278ec4f6d Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sat, 6 May 2017 18:03:33 +0200 Subject: mtd: nand: Drop the ->errstat() hook The ->errstat() hook is no longer implemented NAND controller drivers. Get rid of it before someone starts abusing it. Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 9de3686e738c..8b3607bde1b5 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -834,9 +834,6 @@ struct nand_manufacturer_ops { * structure which is shared among multiple independent * devices. * @priv: [OPTIONAL] pointer to private chip data - * @errstat: [OPTIONAL] hardware specific function to perform - * additional error status checks (determine if errors are - * correctable). * @manufacturer: [INTERN] Contains manufacturer information */ @@ -860,8 +857,6 @@ struct nand_chip { int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this); int (*erase)(struct mtd_info *mtd, int page); int (*scan_bbt)(struct mtd_info *mtd); - int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state, - int status, int page); int (*onfi_set_features)(struct mtd_info *mtd, struct nand_chip *chip, int feature_addr, uint8_t *subfeature_para); int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip, -- cgit v1.2.3 From d2f31c49cf7cfe8f02b70614ae56a39b0c1d8a75 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 May 2017 23:11:39 +0200 Subject: i2c: sh_mobile: remove platform_data No platform currently upstream makes use of this platform_data anymore. The ones that did are converted to DT meanwhile. So, remove it. The old platforms likely don't have the 'clks_per_cnt' feature, otherwise it would have been implemented by now. And in the unlikely case they need to setup a different bus speed, we should rather go for a generic i2c platform data just for that. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang --- include/linux/i2c/i2c-sh_mobile.h | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 include/linux/i2c/i2c-sh_mobile.h (limited to 'include') diff --git a/include/linux/i2c/i2c-sh_mobile.h b/include/linux/i2c/i2c-sh_mobile.h deleted file mode 100644 index 06e3089795fb..000000000000 --- a/include/linux/i2c/i2c-sh_mobile.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef __I2C_SH_MOBILE_H__ -#define __I2C_SH_MOBILE_H__ - -#include - -struct i2c_sh_mobile_platform_data { - unsigned long bus_speed; - unsigned int clks_per_count; -}; - -#endif /* __I2C_SH_MOBILE_H__ */ -- cgit v1.2.3 From 5c82a6ae0242416cfead597bb2b42aa3481a0ba7 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 2 Jun 2017 14:15:15 +0200 Subject: rtc: remove rtc_device.name rtc->name is only used in messages were it is superfluous. Remove it completely from the structure. Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index b693adac853b..d354f56e0cf5 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -116,7 +116,6 @@ struct rtc_device { struct module *owner; int id; - char name[RTC_DEVICE_NAME_SIZE]; const struct rtc_class_ops *ops; struct mutex ops_lock; -- cgit v1.2.3 From aaac082dac0a8ac6b00509c7ae2fa8280f966652 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 6 Jun 2017 23:59:34 -0700 Subject: HID: serialize hid_hw_open and hid_hw_close The HID transport drivers either re-implement exactly the same logic (usbhid, i2c-hid) or forget to implement it (usbhid) which causes issues when the same device is accessed via multiple interfaces (for example input device through evdev and also hidraw). Let's muve the locking logic into HID core to make sure the serialized behavior is always enforced. Also let's uninline and move hid_hw_start() and hid_hw_stop() into hid-core as hid_hw_start() is somewhat large and do not believe we get any benefit from these two being inline. Signed-off-by: Dmitry Torokhov Reviewed-by: Andy Shevchenko Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 72 +++++++---------------------------------------------- 1 file changed, 9 insertions(+), 63 deletions(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 5be325d890d9..5501eb64dbc4 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -520,7 +521,10 @@ struct hid_device { /* device report descriptor */ struct semaphore driver_input_lock; /* protects the current driver */ struct device dev; /* device */ struct hid_driver *driver; + struct hid_ll_driver *ll_driver; + struct mutex ll_open_lock; + unsigned int ll_open_count; #ifdef CONFIG_HID_BATTERY_STRENGTH /* @@ -937,69 +941,11 @@ static inline int __must_check hid_parse(struct hid_device *hdev) return hid_open_report(hdev); } -/** - * hid_hw_start - start underlaying HW - * - * @hdev: hid device - * @connect_mask: which outputs to connect, see HID_CONNECT_* - * - * Call this in probe function *after* hid_parse. This will setup HW buffers - * and start the device (if not deffered to device open). hid_hw_stop must be - * called if this was successful. - */ -static inline int __must_check hid_hw_start(struct hid_device *hdev, - unsigned int connect_mask) -{ - int ret = hdev->ll_driver->start(hdev); - if (ret || !connect_mask) - return ret; - ret = hid_connect(hdev, connect_mask); - if (ret) - hdev->ll_driver->stop(hdev); - return ret; -} - -/** - * hid_hw_stop - stop underlaying HW - * - * @hdev: hid device - * - * This is usually called from remove function or from probe when something - * failed and hid_hw_start was called already. - */ -static inline void hid_hw_stop(struct hid_device *hdev) -{ - hid_disconnect(hdev); - hdev->ll_driver->stop(hdev); -} - -/** - * hid_hw_open - signal underlaying HW to start delivering events - * - * @hdev: hid device - * - * Tell underlying HW to start delivering events from the device. - * This function should be called sometime after successful call - * to hid_hiw_start(). - */ -static inline int __must_check hid_hw_open(struct hid_device *hdev) -{ - return hdev->ll_driver->open(hdev); -} - -/** - * hid_hw_close - signal underlaying HW to stop delivering events - * - * @hdev: hid device - * - * This function indicates that we are not interested in the events - * from this device anymore. Delivery of events may or may not stop, - * depending on the number of users still outstanding. - */ -static inline void hid_hw_close(struct hid_device *hdev) -{ - hdev->ll_driver->close(hdev); -} +int __must_check hid_hw_start(struct hid_device *hdev, + unsigned int connect_mask); +void hid_hw_stop(struct hid_device *hdev); +int __must_check hid_hw_open(struct hid_device *hdev); +void hid_hw_close(struct hid_device *hdev); /** * hid_hw_power - requests underlying HW to go into given power mode -- cgit v1.2.3 From 283a21da1239d8db7fdf6d9077feed73a6efffa2 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 6 Jun 2017 23:59:38 -0700 Subject: HID: remove no longer used hid->open field Now that all users have migrated to use hid->ll_open_count, we can remove hid->open field. Signed-off-by: Dmitry Torokhov Reviewed-by: Andy Shevchenko Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 5501eb64dbc4..72e8ac667771 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -548,7 +548,6 @@ struct hid_device { /* device report descriptor */ void *hiddev; /* The hiddev structure */ void *hidraw; - int open; /* is the device open by anyone? */ char name[128]; /* Device name */ char phys[64]; /* Device physical location */ char uniq[64]; /* Device unique identifier (serial #) */ -- cgit v1.2.3 From c17cd24959cdb12c855dc61e20c36fa25f21f3d3 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 1 Jun 2017 03:10:53 -0700 Subject: target/configfs: Kill se_device->dev_link_magic Instead of using a hardcoded magic value in se_device when verifying a target config_item symlink source during target_fabric_port_link(), go ahead and use target_core_dev_item_ops directly instead. Reviewed-by: Christoph Hellwig Cc: Mike Christie Cc: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- include/target/target_core_base.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 0c1dce2ac6f0..c3c14d053122 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -746,8 +746,6 @@ struct se_dev_stat_grps { }; struct se_device { -#define SE_DEV_LINK_MAGIC 0xfeeddeef - u32 dev_link_magic; /* RELATIVE TARGET PORT IDENTIFER Counter */ u16 dev_rpti_counter; /* Used for SAM Task Attribute ordering */ -- cgit v1.2.3 From 9ae0e9ade56f23765366d2cfad24e65f28df977d Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 1 Jun 2017 03:11:18 -0700 Subject: target/configfs: Kill se_lun->lun_link_magic Instead of using a hardcoded magic value in se_lun when verifying a target config_item symlink source during target_fabric_mappedlun_link(), go ahead and use target_fabric_port_item_ops directly instead. Reviewed-by: Christoph Hellwig Cc: Mike Christie Cc: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- include/target/target_core_base.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index c3c14d053122..47d9f381209f 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -701,8 +701,6 @@ struct scsi_port_stats { struct se_lun { u64 unpacked_lun; -#define SE_LUN_LINK_MAGIC 0xffff7771 - u32 lun_link_magic; bool lun_shutdown; bool lun_access_ro; u32 lun_index; -- cgit v1.2.3 From 2c8f8afa7f92acb07641bf95b940d384ed1d0294 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 7 Jun 2017 20:52:10 +0900 Subject: mtd: nand: add generic helpers to check, match, maximize ECC settings Driver are responsible for setting up ECC parameters correctly. Those include: - Check if ECC parameters specified (usually by DT) are valid - Meet the chip's ECC requirement - Maximize ECC strength if NAND_ECC_MAXIMIZE flag is set The logic can be generalized by factoring out common code. This commit adds 3 helpers to the NAND framework: nand_check_ecc_caps - Check if preset step_size and strength are valid nand_match_ecc_req - Match the chip's requirement nand_maximize_ecc - Maximize the ECC strength To use the helpers above, a driver needs to provide: - Data array of supported ECC step size and strength - A hook that calculates ECC bytes from the combination of step_size and strength. By using those helpers, code duplication among drivers will be reduced. Signed-off-by: Masahiro Yamada Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 8b3607bde1b5..568f53e812cd 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -481,6 +481,30 @@ static inline void nand_hw_control_init(struct nand_hw_control *nfc) init_waitqueue_head(&nfc->wq); } +/** + * struct nand_ecc_step_info - ECC step information of ECC engine + * @stepsize: data bytes per ECC step + * @strengths: array of supported strengths + * @nstrengths: number of supported strengths + */ +struct nand_ecc_step_info { + int stepsize; + const int *strengths; + int nstrengths; +}; + +/** + * struct nand_ecc_caps - capability of ECC engine + * @stepinfos: array of ECC step information + * @nstepinfos: number of ECC step information + * @calc_ecc_bytes: driver's hook to calculate ECC bytes per step + */ +struct nand_ecc_caps { + const struct nand_ecc_step_info *stepinfos; + int nstepinfos; + int (*calc_ecc_bytes)(int step_size, int strength); +}; + /** * struct nand_ecc_ctrl - Control structure for ECC * @mode: ECC mode @@ -1246,6 +1270,15 @@ int nand_check_erased_ecc_chunk(void *data, int datalen, void *extraoob, int extraooblen, int threshold); +int nand_check_ecc_caps(struct nand_chip *chip, + const struct nand_ecc_caps *caps, int oobavail); + +int nand_match_ecc_req(struct nand_chip *chip, + const struct nand_ecc_caps *caps, int oobavail); + +int nand_maximize_ecc(struct nand_chip *chip, + const struct nand_ecc_caps *caps, int oobavail); + /* Default write_oob implementation */ int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page); -- cgit v1.2.3 From a03c60178c181767ecfb26fb311a88742d228118 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 7 Jun 2017 20:52:11 +0900 Subject: mtd: nand: add a shorthand to generate nand_ecc_caps structure struct nand_ecc_caps was designed as flexible as possible to support multiple stepsizes (like sunxi_nand.c). So, we need to write multiple arrays even for the simplest case. I guess many controllers support a single stepsize, so here is a shorthand macro for the case. It allows to describe like ... NAND_ECC_CAPS_SINGLE(denali_pci_ecc_caps, denali_calc_ecc_bytes, 512, 8, 15); ... instead of static const int denali_pci_ecc_strengths[] = {8, 15}; static const struct nand_ecc_step_info denali_pci_ecc_stepinfo = { .stepsize = 512, .strengths = denali_pci_ecc_strengths, .nstrengths = ARRAY_SIZE(denali_pci_ecc_strengths), }; static const struct nand_ecc_caps denali_pci_ecc_caps = { .stepinfos = &denali_pci_ecc_stepinfo, .nstepinfos = 1, .calc_ecc_bytes = denali_calc_ecc_bytes, }; Signed-off-by: Masahiro Yamada Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 568f53e812cd..dc8fbc033442 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -505,6 +505,20 @@ struct nand_ecc_caps { int (*calc_ecc_bytes)(int step_size, int strength); }; +/* a shorthand to generate struct nand_ecc_caps with only one ECC stepsize */ +#define NAND_ECC_CAPS_SINGLE(__name, __calc, __step, ...) \ +static const int __name##_strengths[] = { __VA_ARGS__ }; \ +static const struct nand_ecc_step_info __name##_stepinfo = { \ + .stepsize = __step, \ + .strengths = __name##_strengths, \ + .nstrengths = ARRAY_SIZE(__name##_strengths), \ +}; \ +static const struct nand_ecc_caps __name = { \ + .stepinfos = &__name##_stepinfo, \ + .nstepinfos = 1, \ + .calc_ecc_bytes = __calc, \ +} + /** * struct nand_ecc_ctrl - Control structure for ECC * @mode: ECC mode -- cgit v1.2.3 From 68c35ea25bdd4ad10445c4c02f7d48b3dccab8cc Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 16 May 2017 17:46:48 +0200 Subject: mfd: cros_ec: Add helper for event notifier. Add cros_ec_get_event() entry point to retrieve event within functions called by the notifier. Signed-off-by: Gwendal Grignou Signed-off-by: Enric Balletbo i Serra Acked-by: Lee Jones Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 28baee63eaf6..b61b2e013698 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -300,6 +300,16 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev); */ int cros_ec_get_next_event(struct cros_ec_device *ec_dev); +/** + * cros_ec_get_host_event - Return a mask of event set by the EC. + * + * When MKBP is supported, when the EC raises an interrupt, + * We collect the events raised and call the functions in the ec notifier. + * + * This function is a helper to know which events are raised. + */ +u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev); + /* sysfs stuff */ extern struct attribute_group cros_ec_attr_group; extern struct attribute_group cros_ec_lightbar_attr_group; -- cgit v1.2.3 From 0aa877c558477e5c4b0faaa618cfd41f8c0b3319 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Tue, 16 May 2017 17:46:48 +0200 Subject: mfd: cros_ec: Add EC console read structures definitions ec_params_console_read_v1 is used to capture EC logs from kernel, and ec_params_get_cmd_versions_v1 is used to probe whether EC supports that command. Signed-off-by: Nicolas Boichat Reviewed-by: Guenter Roeck Acked-by: Lee Jones Tested-by: Enric Balletbo i Serra Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec_commands.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index c93e7e0300ef..1b19e424e1cf 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -625,6 +625,10 @@ struct ec_params_get_cmd_versions { uint8_t cmd; /* Command to check */ } __packed; +struct ec_params_get_cmd_versions_v1 { + uint16_t cmd; /* Command to check */ +} __packed; + struct ec_response_get_cmd_versions { /* * Mask of supported versions; use EC_VER_MASK() to compare with a @@ -2285,13 +2289,28 @@ struct ec_params_charge_control { #define EC_CMD_CONSOLE_SNAPSHOT 0x97 /* - * Read next chunk of data from saved snapshot. + * Read data from the saved snapshot. If the subcmd parameter is + * CONSOLE_READ_NEXT, this will return data starting from the beginning of + * the latest snapshot. If it is CONSOLE_READ_RECENT, it will start from the + * end of the previous snapshot. + * + * The params are only looked at in version >= 1 of this command. Prior + * versions will just default to CONSOLE_READ_NEXT behavior. * * Response is null-terminated string. Empty string, if there is no more * remaining output. */ #define EC_CMD_CONSOLE_READ 0x98 +enum ec_console_read_subcmd { + CONSOLE_READ_NEXT = 0, + CONSOLE_READ_RECENT +}; + +struct ec_params_console_read_v1 { + uint8_t subcmd; /* enum ec_console_read_subcmd */ +} __packed; + /*****************************************************************************/ /* -- cgit v1.2.3 From e86264595225d2764a903965356ef59aeb7d1c47 Mon Sep 17 00:00:00 2001 From: Eric Caruso Date: Tue, 16 May 2017 17:46:48 +0200 Subject: mfd: cros_ec: add debugfs, console log file If the EC supports the new CONSOLE_READ command type, then we place a console_log file in debugfs for that EC device which allows us to grab EC logs. The kernel will poll every 10 seconds for the log and keep its own buffer, but userspace should grab this and write it out to some logs which actually get rotated. Signed-off-by: Eric Caruso Signed-off-by: Nicolas Boichat Acked-by: Lee Jones Tested-by: Enric Balletbo i Serra [bleung: restored original version of this commit, with pointer size issue to be fixed in next commit] Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index b61b2e013698..3b16c9009749 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -172,6 +172,8 @@ struct cros_ec_platform { u16 cmd_offset; }; +struct cros_ec_debugfs; + /* * struct cros_ec_dev - ChromeOS EC device entry point * @@ -179,6 +181,7 @@ struct cros_ec_platform { * @cdev: Character device structure in /dev * @ec_dev: cros_ec_device structure to talk to the physical device * @dev: pointer to the platform device + * @debug_info: cros_ec_debugfs structure for debugging information * @cmd_offset: offset to apply for each command. */ struct cros_ec_dev { @@ -186,6 +189,7 @@ struct cros_ec_dev { struct cdev cdev; struct cros_ec_device *ec_dev; struct device *dev; + struct cros_ec_debugfs *debug_info; u16 cmd_offset; u32 features[2]; }; -- cgit v1.2.3 From e297a783e41560b44e3c14f38e420cba518113b8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 7 Jun 2017 19:58:56 -0400 Subject: random: add wait_for_random_bytes() API This enables users of get_random_{bytes,u32,u64,int,long} to wait until the pool is ready before using this function, in case they actually want to have reliable randomness. Signed-off-by: Jason A. Donenfeld Signed-off-by: Theodore Ts'o --- include/linux/random.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index ed5c3838780d..e29929347c95 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -34,6 +34,7 @@ extern void add_input_randomness(unsigned int type, unsigned int code, extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; extern void get_random_bytes(void *buf, int nbytes); +extern int wait_for_random_bytes(void); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); extern void get_random_bytes_arch(void *buf, int nbytes); -- cgit v1.2.3 From da9ba564bd683374b8d319756f312821b8265b06 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 7 Jun 2017 20:05:02 -0400 Subject: random: add get_random_{bytes,u32,u64,int,long,once}_wait family These functions are simple convenience wrappers that call wait_for_random_bytes before calling the respective get_random_* function. Signed-off-by: Jason A. Donenfeld Signed-off-by: Theodore Ts'o --- include/linux/net.h | 2 ++ include/linux/once.h | 2 ++ include/linux/random.h | 25 +++++++++++++++++++++++++ 3 files changed, 29 insertions(+) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index abcfa46a2bd9..dda2cc939a53 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -274,6 +274,8 @@ do { \ #define net_get_random_once(buf, nbytes) \ get_random_once((buf), (nbytes)) +#define net_get_random_once_wait(buf, nbytes) \ + get_random_once_wait((buf), (nbytes)) int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); diff --git a/include/linux/once.h b/include/linux/once.h index 285f12cb40e6..9c98aaa87cbc 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -53,5 +53,7 @@ void __do_once_done(bool *done, struct static_key *once_key, #define get_random_once(buf, nbytes) \ DO_ONCE(get_random_bytes, (buf), (nbytes)) +#define get_random_once_wait(buf, nbytes) \ + DO_ONCE(get_random_bytes_wait, (buf), (nbytes)) \ #endif /* _LINUX_ONCE_H */ diff --git a/include/linux/random.h b/include/linux/random.h index e29929347c95..4aecc339558d 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -58,6 +58,31 @@ static inline unsigned long get_random_long(void) #endif } +/* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). + * Returns the result of the call to wait_for_random_bytes. */ +static inline int get_random_bytes_wait(void *buf, int nbytes) +{ + int ret = wait_for_random_bytes(); + if (unlikely(ret)) + return ret; + get_random_bytes(buf, nbytes); + return 0; +} + +#define declare_get_random_var_wait(var) \ + static inline int get_random_ ## var ## _wait(var *out) { \ + int ret = wait_for_random_bytes(); \ + if (unlikely(ret)) \ + return ret; \ + *out = get_random_ ## var(); \ + return 0; \ + } +declare_get_random_var_wait(u32) +declare_get_random_var_wait(u64) +declare_get_random_var_wait(int) +declare_get_random_var_wait(long) +#undef declare_get_random_var + unsigned long randomize_page(unsigned long start, unsigned long range); u32 prandom_u32(void); -- cgit v1.2.3 From 3708184afc77bb67709a67a35d9f367ebd32cbc4 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 6 Jun 2017 12:37:37 +0300 Subject: device property: Move FW type specific functionality to FW specific files The device and fwnode property API supports Devicetree, ACPI and pset properties. The implementation of this functionality for each firmware type was embedded in the fwnode property core. Move it out to firmware type specific locations, making it easier to maintain. Depends-on: ("of: Move OF property and graph API from base.c to property.c") Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Acked-by: Rob Herring Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 4 ++++ include/linux/fwnode.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/of.h | 2 ++ 3 files changed, 60 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 137e4a3d89c5..b8f23c521b67 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -56,6 +56,9 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) + +extern const struct fwnode_operations acpi_fwnode_ops; + static inline struct fwnode_handle *acpi_alloc_fwnode_static(void) { struct fwnode_handle *fwnode; @@ -65,6 +68,7 @@ static inline struct fwnode_handle *acpi_alloc_fwnode_static(void) return NULL; fwnode->type = FWNODE_ACPI_STATIC; + fwnode->ops = &acpi_fwnode_ops; return fwnode; } diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 3dff2398a5f0..8f64b3ae9c57 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -12,6 +12,8 @@ #ifndef _LINUX_FWNODE_H_ #define _LINUX_FWNODE_H_ +#include + enum fwnode_type { FWNODE_INVALID = 0, FWNODE_OF, @@ -22,9 +24,12 @@ enum fwnode_type { FWNODE_IRQCHIP }; +struct fwnode_operations; + struct fwnode_handle { enum fwnode_type type; struct fwnode_handle *secondary; + const struct fwnode_operations *ops; }; /** @@ -39,4 +44,53 @@ struct fwnode_endpoint { const struct fwnode_handle *local_fwnode; }; +/** + * struct fwnode_operations - Operations for fwnode interface + * @get: Get a reference to an fwnode. + * @put: Put a reference to an fwnode. + * @property_present: Return true if a property is present. + * @property_read_integer_array: Read an array of integer properties. Return + * zero on success, a negative error code + * otherwise. + * @property_read_string_array: Read an array of string properties. Return zero + * on success, a negative error code otherwise. + * @get_parent: Return the parent of an fwnode. + * @get_next_child_node: Return the next child node in an iteration. + * @get_named_child_node: Return a child node with a given name. + */ +struct fwnode_operations { + void (*get)(struct fwnode_handle *fwnode); + void (*put)(struct fwnode_handle *fwnode); + bool (*property_present)(struct fwnode_handle *fwnode, + const char *propname); + int (*property_read_int_array)(struct fwnode_handle *fwnode, + const char *propname, + unsigned int elem_size, void *val, + size_t nval); + int (*property_read_string_array)(struct fwnode_handle *fwnode_handle, + const char *propname, + const char **val, size_t nval); + struct fwnode_handle *(*get_parent)(struct fwnode_handle *fwnode); + struct fwnode_handle * + (*get_next_child_node)(struct fwnode_handle *fwnode, + struct fwnode_handle *child); + struct fwnode_handle * + (*get_named_child_node)(struct fwnode_handle *fwnode, const char *name); +}; + +#define fwnode_has_op(fwnode, op) \ + ((fwnode) && (fwnode)->ops && (fwnode)->ops->op) +#define fwnode_call_int_op(fwnode, op, ...) \ + (fwnode ? (fwnode_has_op(fwnode, op) ? \ + (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : -ENXIO) : \ + -EINVAL) +#define fwnode_call_ptr_op(fwnode, op, ...) \ + (fwnode_has_op(fwnode, op) ? \ + (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : NULL) +#define fwnode_call_void_op(fwnode, op, ...) \ + do { \ + if (fwnode_has_op(fwnode, op)) \ + (fwnode)->ops->op(fwnode, ## __VA_ARGS__); \ + } while (false) + #endif diff --git a/include/linux/of.h b/include/linux/of.h index 29b7b738b509..cdbfa88c32cf 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -100,10 +100,12 @@ struct of_reconfig_data { /* initialize a node */ extern struct kobj_type of_node_ktype; +extern const struct fwnode_operations of_fwnode_ops; static inline void of_node_init(struct device_node *node) { kobject_init(&node->kobj, &of_node_ktype); node->fwnode.type = FWNODE_OF; + node->fwnode.ops = &of_fwnode_ops; } /* true when node is initialized */ -- cgit v1.2.3 From 3b27d00e7b6d7c889d87fd00df600c495b968e30 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 6 Jun 2017 12:37:38 +0300 Subject: device property: Move fwnode graph ops to firmware specific locations Move firmware specific implementations of the fwnode graph operations to firmware specific locations. Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Acked-by: Rob Herring Signed-off-by: Rafael J. Wysocki --- include/linux/fwnode.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 8f64b3ae9c57..e315d867d631 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -57,6 +57,11 @@ struct fwnode_endpoint { * @get_parent: Return the parent of an fwnode. * @get_next_child_node: Return the next child node in an iteration. * @get_named_child_node: Return a child node with a given name. + * @graph_get_next_endpoint: Return an endpoint node in an iteration. + * @graph_get_remote_endpoint: Return the remote endpoint node of a local + * endpoint node. + * @graph_get_port_parent: Return the parent node of a port node. + * @graph_parse_endpoint: Parse endpoint for port and endpoint id. */ struct fwnode_operations { void (*get)(struct fwnode_handle *fwnode); @@ -76,6 +81,15 @@ struct fwnode_operations { struct fwnode_handle *child); struct fwnode_handle * (*get_named_child_node)(struct fwnode_handle *fwnode, const char *name); + struct fwnode_handle * + (*graph_get_next_endpoint)(struct fwnode_handle *fwnode, + struct fwnode_handle *prev); + struct fwnode_handle * + (*graph_get_remote_endpoint)(struct fwnode_handle *fwnode); + struct fwnode_handle * + (*graph_get_port_parent)(struct fwnode_handle *fwnode); + int (*graph_parse_endpoint)(struct fwnode_handle *fwnode, + struct fwnode_endpoint *endpoint); }; #define fwnode_has_op(fwnode, op) \ -- cgit v1.2.3 From 2294b3af05e9b3fe0b84a78971e709037bd7593c Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 6 Jun 2017 12:37:39 +0300 Subject: device property: Introduce fwnode_device_is_available() Add fwnode_device_is_available() to tell whether the device corresponding to a certain fwnode_handle is available for use. Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Acked-by: Rob Herring Signed-off-by: Rafael J. Wysocki --- include/linux/fwnode.h | 1 + include/linux/property.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index e315d867d631..9ab375419189 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -66,6 +66,7 @@ struct fwnode_endpoint { struct fwnode_operations { void (*get)(struct fwnode_handle *fwnode); void (*put)(struct fwnode_handle *fwnode); + bool (*device_is_available)(struct fwnode_handle *fwnode); bool (*property_present)(struct fwnode_handle *fwnode, const char *propname); int (*property_read_int_array)(struct fwnode_handle *fwnode, diff --git a/include/linux/property.h b/include/linux/property.h index 2f482616a2f2..7be014af78ed 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -51,6 +51,7 @@ int device_property_read_string(struct device *dev, const char *propname, int device_property_match_string(struct device *dev, const char *propname, const char *string); +bool fwnode_device_is_available(struct fwnode_handle *fwnode); bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname); int fwnode_property_read_u8_array(struct fwnode_handle *fwnode, const char *propname, u8 *val, -- cgit v1.2.3 From 125ee6b3b0fa920c730b0991e6f083a9f5b1e4c3 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 6 Jun 2017 12:37:40 +0300 Subject: device property: Add FW type agnostic fwnode_graph_get_remote_node Add fwnode_graph_get_remote_node() function which is equivalent to of_graph_get_remote_node() on OF. Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/property.h b/include/linux/property.h index 7be014af78ed..0597a743aa66 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -281,6 +281,8 @@ struct fwnode_handle *fwnode_graph_get_remote_port( struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_graph_get_remote_endpoint( struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_graph_get_remote_node(struct fwnode_handle *fwnode, + u32 port, u32 endpoint); int fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint); -- cgit v1.2.3 From 6a71d8d77795e0f7d887baa95bfc0d1d2bc74899 Mon Sep 17 00:00:00 2001 From: Kieran Bingham Date: Tue, 6 Jun 2017 12:37:41 +0300 Subject: device property: Add fwnode_graph_get_port_parent Provide a helper to obtain the parent device fwnode without first parsing the remote-endpoint as per fwnode_graph_get_remote_port_parent. Signed-off-by: Kieran Bingham Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/property.h b/include/linux/property.h index 0597a743aa66..7e77039e6b81 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -275,6 +275,8 @@ void *device_get_mac_address(struct device *dev, char *addr, int alen); struct fwnode_handle *fwnode_graph_get_next_endpoint( struct fwnode_handle *fwnode, struct fwnode_handle *prev); +struct fwnode_handle * +fwnode_graph_get_port_parent(struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_graph_get_remote_port_parent( struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_graph_get_remote_port( -- cgit v1.2.3 From 1a0915be192606fee64830b9c5d70b7ed59426b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 21 Jun 2017 08:26:46 +0200 Subject: mtd: partitions: add support for partition parsers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some devices have partitions that are kind of containers with extra subpartitions / volumes instead of e.g. a simple filesystem data. To support such cases we need to first create normal flash device partitions and then take care of these special ones. It's very common case for home routers. Depending on the vendor there are formats like TRX, Seama, TP-Link, WRGG & more. All of them are used to embed few partitions into a single one / single firmware file. Ideally all vendors would use some well documented / standardized format like UBI (and some probably start doing so), but there are still countless devices on the market using these poor vendor specific formats. This patch extends MTD subsystem by allowing to specify list of parsers that should be tried for a given partition. Supporting such poor formats is highly unlikely to be the top priority so these changes try to minimize maintenance cost to the minimum. It reuses existing code for these new parsers and just adds a one property and one new function. This implementation requires setting partition parsers in a flash parser. A proper change of bcm47xxpart will follow and in the future we will hopefully also find a solution for doing it with ofpart ("fixed-partitions"). Signed-off-by: Rafał Miłecki Signed-off-by: Brian Norris --- include/linux/mtd/partitions.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index 06df1e06b6e0..c4beb70dacbd 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -20,6 +20,12 @@ * * For each partition, these fields are available: * name: string that will be used to label the partition's MTD device. + * types: some partitions can be containers using specific format to describe + * embedded subpartitions / volumes. E.g. many home routers use "firmware" + * partition that contains at least kernel and rootfs. In such case an + * extra parser is needed that will detect these dynamic partitions and + * report them to the MTD subsystem. If set this property stores an array + * of parser names to use when looking for subpartitions. * size: the partition size; if defined as MTDPART_SIZ_FULL, the partition * will extend to the end of the master MTD device. * offset: absolute starting position within the master MTD device; if @@ -38,6 +44,7 @@ struct mtd_partition { const char *name; /* identifier string */ + const char *const *types; /* names of parsers to use if any */ uint64_t size; /* partition size */ uint64_t offset; /* offset within the master MTD space */ uint32_t mask_flags; /* master MTD flags to mask out for this partition */ -- cgit v1.2.3 From bce70fef7279243d62adbf5f53998b8d3d016144 Mon Sep 17 00:00:00 2001 From: Shawn Nematbakhsh Date: Tue, 16 May 2017 17:46:48 +0200 Subject: platform/chrome: cros_ec_lpc: Add R/W helpers to LPC protocol variants Call common functions for read / write to prepare support for future LPC protocol variants which use different I/O ops than inb / outb. Signed-off-by: Shawn Nematbakhsh Signed-off-by: Thierry Escande Acked-by: Lee Jones Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec_lpc_reg.h | 47 +++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 include/linux/mfd/cros_ec_lpc_reg.h (limited to 'include') diff --git a/include/linux/mfd/cros_ec_lpc_reg.h b/include/linux/mfd/cros_ec_lpc_reg.h new file mode 100644 index 000000000000..4089bd5c8313 --- /dev/null +++ b/include/linux/mfd/cros_ec_lpc_reg.h @@ -0,0 +1,47 @@ +/* + * cros_ec_lpc_reg - LPC access to the Chrome OS Embedded Controller + * + * Copyright (C) 2016 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This driver uses the Chrome OS EC byte-level message-based protocol for + * communicating the keyboard state (which keys are pressed) from a keyboard EC + * to the AP over some bus (such as i2c, lpc, spi). The EC does debouncing, + * but everything else (including deghosting) is done here. The main + * motivation for this is to keep the EC firmware as simple as possible, since + * it cannot be easily upgraded and EC flash/IRAM space is relatively + * expensive. + */ + +#ifndef __LINUX_MFD_CROS_EC_REG_H +#define __LINUX_MFD_CROS_EC_REG_H + +/** + * cros_ec_lpc_read_bytes - Read bytes from a given LPC-mapped address. + * Returns 8-bit checksum of all bytes read. + * + * @offset: Base read address + * @length: Number of bytes to read + * @dest: Destination buffer + */ +u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest); + +/** + * cros_ec_lpc_write_bytes - Write bytes to a given LPC-mapped address. + * Returns 8-bit checksum of all bytes written. + * + * @offset: Base write address + * @length: Number of bytes to write + * @msg: Write data buffer + */ +u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg); + +#endif /* __LINUX_MFD_CROS_EC_REG_H */ -- cgit v1.2.3 From 8d4a3dc423a2695be51ac864eefb8ba7688b1240 Mon Sep 17 00:00:00 2001 From: Shawn Nematbakhsh Date: Tue, 16 May 2017 17:46:48 +0200 Subject: platform/chrome: cros_ec_lpc: Add support for mec1322 EC This adds support for the ChromeOS LPC Microchip Embedded Controller (mec1322) variant. mec1322 accesses I/O region [800h, 9ffh] through embedded memory interface (EMI) rather than LPC. Signed-off-by: Shawn Nematbakhsh Signed-off-by: Thierry Escande Acked-by: Lee Jones Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec_lpc_mec.h | 90 +++++++++++++++++++++++++++++++++++++ include/linux/mfd/cros_ec_lpc_reg.h | 14 ++++++ 2 files changed, 104 insertions(+) create mode 100644 include/linux/mfd/cros_ec_lpc_mec.h (limited to 'include') diff --git a/include/linux/mfd/cros_ec_lpc_mec.h b/include/linux/mfd/cros_ec_lpc_mec.h new file mode 100644 index 000000000000..176496ddc66c --- /dev/null +++ b/include/linux/mfd/cros_ec_lpc_mec.h @@ -0,0 +1,90 @@ +/* + * cros_ec_lpc_mec - LPC variant I/O for Microchip EC + * + * Copyright (C) 2016 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This driver uses the Chrome OS EC byte-level message-based protocol for + * communicating the keyboard state (which keys are pressed) from a keyboard EC + * to the AP over some bus (such as i2c, lpc, spi). The EC does debouncing, + * but everything else (including deghosting) is done here. The main + * motivation for this is to keep the EC firmware as simple as possible, since + * it cannot be easily upgraded and EC flash/IRAM space is relatively + * expensive. + */ + +#ifndef __LINUX_MFD_CROS_EC_MEC_H +#define __LINUX_MFD_CROS_EC_MEC_H + +#include + +enum cros_ec_lpc_mec_emi_access_mode { + /* 8-bit access */ + ACCESS_TYPE_BYTE = 0x0, + /* 16-bit access */ + ACCESS_TYPE_WORD = 0x1, + /* 32-bit access */ + ACCESS_TYPE_LONG = 0x2, + /* + * 32-bit access, read or write of MEC_EMI_EC_DATA_B3 causes the + * EC data register to be incremented. + */ + ACCESS_TYPE_LONG_AUTO_INCREMENT = 0x3, +}; + +enum cros_ec_lpc_mec_io_type { + MEC_IO_READ, + MEC_IO_WRITE, +}; + +/* Access IO ranges 0x800 thru 0x9ff using EMI interface instead of LPC */ +#define MEC_EMI_RANGE_START EC_HOST_CMD_REGION0 +#define MEC_EMI_RANGE_END (EC_LPC_ADDR_MEMMAP + EC_MEMMAP_SIZE) + +/* EMI registers are relative to base */ +#define MEC_EMI_BASE 0x800 +#define MEC_EMI_HOST_TO_EC (MEC_EMI_BASE + 0) +#define MEC_EMI_EC_TO_HOST (MEC_EMI_BASE + 1) +#define MEC_EMI_EC_ADDRESS_B0 (MEC_EMI_BASE + 2) +#define MEC_EMI_EC_ADDRESS_B1 (MEC_EMI_BASE + 3) +#define MEC_EMI_EC_DATA_B0 (MEC_EMI_BASE + 4) +#define MEC_EMI_EC_DATA_B1 (MEC_EMI_BASE + 5) +#define MEC_EMI_EC_DATA_B2 (MEC_EMI_BASE + 6) +#define MEC_EMI_EC_DATA_B3 (MEC_EMI_BASE + 7) + +/* + * cros_ec_lpc_mec_init + * + * Initialize MEC I/O. + */ +void cros_ec_lpc_mec_init(void); + +/* + * cros_ec_lpc_mec_destroy + * + * Cleanup MEC I/O. + */ +void cros_ec_lpc_mec_destroy(void); + +/** + * cros_ec_lpc_io_bytes_mec - Read / write bytes to MEC EMI port + * + * @io_type: MEC_IO_READ or MEC_IO_WRITE, depending on request + * @offset: Base read / write address + * @length: Number of bytes to read / write + * @buf: Destination / source buffer + * + * @return 8-bit checksum of all bytes read / written + */ +u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type io_type, + unsigned int offset, unsigned int length, u8 *buf); + +#endif /* __LINUX_MFD_CROS_EC_MEC_H */ diff --git a/include/linux/mfd/cros_ec_lpc_reg.h b/include/linux/mfd/cros_ec_lpc_reg.h index 4089bd5c8313..5560bef63c2b 100644 --- a/include/linux/mfd/cros_ec_lpc_reg.h +++ b/include/linux/mfd/cros_ec_lpc_reg.h @@ -44,4 +44,18 @@ u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest); */ u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg); +/** + * cros_ec_lpc_reg_init + * + * Initialize register I/O. + */ +void cros_ec_lpc_reg_init(void); + +/** + * cros_ec_lpc_reg_destroy + * + * Cleanup reg I/O. + */ +void cros_ec_lpc_reg_destroy(void); + #endif /* __LINUX_MFD_CROS_EC_REG_H */ -- cgit v1.2.3 From be3ebebf4377fe924f0419f78fc82cf01a31e692 Mon Sep 17 00:00:00 2001 From: Eric Caruso Date: Tue, 16 May 2017 17:46:48 +0200 Subject: platform/chrome: cros_ec_lightbar - Add lightbar program feature to sysfs Add a program feature so we can upload and run programs for lightbar sequences. We should be able to use this to shift sequences out of the EC and save space there. $ cat > /sys/devices/.../cros_ec/program $ echo program > /sys/devices/.../cros_ec/sequence Signed-off-by: Eric Caruso Signed-off-by: Guenter Roeck Signed-off-by: Enric Balletbo i Serra Acked-by: Lee Jones Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec_commands.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 1b19e424e1cf..dbea5802e83b 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -1162,6 +1162,13 @@ struct lightbar_params_v1 { struct rgb_s color[8]; /* 0-3 are Google colors */ } __packed; +/* Lightbar program */ +#define EC_LB_PROG_LEN 192 +struct lightbar_program { + uint8_t size; + uint8_t data[EC_LB_PROG_LEN]; +}; + struct ec_params_lightbar { uint8_t cmd; /* Command (see enum lightbar_command) */ union { @@ -1188,6 +1195,7 @@ struct ec_params_lightbar { struct lightbar_params_v0 set_params_v0; struct lightbar_params_v1 set_params_v1; + struct lightbar_program set_program; }; } __packed; @@ -1220,7 +1228,8 @@ struct ec_response_lightbar { struct { /* no return params */ } off, on, init, set_brightness, seq, reg, set_rgb, - demo, set_params_v0, set_params_v1; + demo, set_params_v0, set_params_v1, + set_program; }; } __packed; @@ -1244,6 +1253,7 @@ enum lightbar_command { LIGHTBAR_CMD_GET_DEMO = 15, LIGHTBAR_CMD_GET_PARAMS_V1 = 16, LIGHTBAR_CMD_SET_PARAMS_V1 = 17, + LIGHTBAR_CMD_SET_PROGRAM = 18, LIGHTBAR_NUM_CMDS }; -- cgit v1.2.3 From 405c84308c4335ee7cb58b9304b77b85e61f7129 Mon Sep 17 00:00:00 2001 From: Eric Caruso Date: Tue, 16 May 2017 17:46:48 +0200 Subject: platform/chrome: cros_ec_lightbar - Control of suspend/resume lightbar sequence Don't let EC control suspend/resume sequence. If the EC controls the lightbar and sets the sequence when it notices the chipset transitioning between states, we can't make exceptions for cases where we don't want to activate the lightbar. Instead, let's move the suspend/resume notifications into the kernel so we can selectively play the sequences. Signed-off-by: Eric Caruso Signed-off-by: Guenter Roeck Signed-off-by: Enric Balletbo i Serra Acked-by: Lee Jones Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec_commands.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index dbea5802e83b..190c8f4afa02 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -1175,7 +1175,7 @@ struct ec_params_lightbar { struct { /* no args */ } dump, off, on, init, get_seq, get_params_v0, get_params_v1, - version, get_brightness, get_demo; + version, get_brightness, get_demo, suspend, resume; struct { uint8_t num; @@ -1193,6 +1193,10 @@ struct ec_params_lightbar { uint8_t led; } get_rgb; + struct { + uint8_t enable; + } manual_suspend_ctrl; + struct lightbar_params_v0 set_params_v0; struct lightbar_params_v1 set_params_v1; struct lightbar_program set_program; @@ -1229,7 +1233,7 @@ struct ec_response_lightbar { /* no return params */ } off, on, init, set_brightness, seq, reg, set_rgb, demo, set_params_v0, set_params_v1, - set_program; + set_program, manual_suspend_ctrl, suspend, resume; }; } __packed; @@ -1254,6 +1258,9 @@ enum lightbar_command { LIGHTBAR_CMD_GET_PARAMS_V1 = 16, LIGHTBAR_CMD_SET_PARAMS_V1 = 17, LIGHTBAR_CMD_SET_PROGRAM = 18, + LIGHTBAR_CMD_MANUAL_SUSPEND_CTRL = 19, + LIGHTBAR_CMD_SUSPEND = 20, + LIGHTBAR_CMD_RESUME = 21, LIGHTBAR_NUM_CMDS }; -- cgit v1.2.3 From 38cb266ad1a24e037220dc563ab46364595b17b6 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 23 Jun 2017 14:00:13 -0600 Subject: DRM: Fix an incorrectly formatted table The "supported input formats" table in dw_hdmi.h was incorrectly formatted, using "+" signs where "|" needs to be. That, in turn, causes the PDF build to fail. Fixes: def23aa7e982 ("drm: bridge: dw-hdmi: Switch to V4L bus format and encodings") Signed-off-by: Jonathan Corbet Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170623140013.0703107a@lwn.net --- include/drm/bridge/dw_hdmi.h | 70 ++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/drm/bridge/dw_hdmi.h b/include/drm/bridge/dw_hdmi.h index 4c8d4c81a0e8..182f83283e24 100644 --- a/include/drm/bridge/dw_hdmi.h +++ b/include/drm/bridge/dw_hdmi.h @@ -22,56 +22,56 @@ struct dw_hdmi; * 48bit bus. * * +----------------------+----------------------------------+------------------------------+ - * + Format Name + Format Code + Encodings + + * | Format Name | Format Code | Encodings | * +----------------------+----------------------------------+------------------------------+ - * + RGB 4:4:4 8bit + ``MEDIA_BUS_FMT_RGB888_1X24`` + ``V4L2_YCBCR_ENC_DEFAULT`` + + * | RGB 4:4:4 8bit | ``MEDIA_BUS_FMT_RGB888_1X24`` | ``V4L2_YCBCR_ENC_DEFAULT`` | * +----------------------+----------------------------------+------------------------------+ - * + RGB 4:4:4 10bits + ``MEDIA_BUS_FMT_RGB101010_1X30`` + ``V4L2_YCBCR_ENC_DEFAULT`` + + * | RGB 4:4:4 10bits | ``MEDIA_BUS_FMT_RGB101010_1X30`` | ``V4L2_YCBCR_ENC_DEFAULT`` | * +----------------------+----------------------------------+------------------------------+ - * + RGB 4:4:4 12bits + ``MEDIA_BUS_FMT_RGB121212_1X36`` + ``V4L2_YCBCR_ENC_DEFAULT`` + + * | RGB 4:4:4 12bits | ``MEDIA_BUS_FMT_RGB121212_1X36`` | ``V4L2_YCBCR_ENC_DEFAULT`` | * +----------------------+----------------------------------+------------------------------+ - * + RGB 4:4:4 16bits + ``MEDIA_BUS_FMT_RGB161616_1X48`` + ``V4L2_YCBCR_ENC_DEFAULT`` + + * | RGB 4:4:4 16bits | ``MEDIA_BUS_FMT_RGB161616_1X48`` | ``V4L2_YCBCR_ENC_DEFAULT`` | * +----------------------+----------------------------------+------------------------------+ - * + YCbCr 4:4:4 8bit + ``MEDIA_BUS_FMT_YUV8_1X24`` + ``V4L2_YCBCR_ENC_601`` + - * + + + or ``V4L2_YCBCR_ENC_709`` + - * + + + or ``V4L2_YCBCR_ENC_XV601`` + - * + + + or ``V4L2_YCBCR_ENC_XV709`` + + * | YCbCr 4:4:4 8bit | ``MEDIA_BUS_FMT_YUV8_1X24`` | ``V4L2_YCBCR_ENC_601`` | + * | | | or ``V4L2_YCBCR_ENC_709`` | + * | | | or ``V4L2_YCBCR_ENC_XV601`` | + * | | | or ``V4L2_YCBCR_ENC_XV709`` | * +----------------------+----------------------------------+------------------------------+ - * + YCbCr 4:4:4 10bits + ``MEDIA_BUS_FMT_YUV10_1X30`` + ``V4L2_YCBCR_ENC_601`` + - * + + + or ``V4L2_YCBCR_ENC_709`` + - * + + + or ``V4L2_YCBCR_ENC_XV601`` + - * + + + or ``V4L2_YCBCR_ENC_XV709`` + + * | YCbCr 4:4:4 10bits | ``MEDIA_BUS_FMT_YUV10_1X30`` | ``V4L2_YCBCR_ENC_601`` | + * | | | or ``V4L2_YCBCR_ENC_709`` | + * | | | or ``V4L2_YCBCR_ENC_XV601`` | + * | | | or ``V4L2_YCBCR_ENC_XV709`` | * +----------------------+----------------------------------+------------------------------+ - * + YCbCr 4:4:4 12bits + ``MEDIA_BUS_FMT_YUV12_1X36`` + ``V4L2_YCBCR_ENC_601`` + - * + + + or ``V4L2_YCBCR_ENC_709`` + - * + + + or ``V4L2_YCBCR_ENC_XV601`` + - * + + + or ``V4L2_YCBCR_ENC_XV709`` + + * | YCbCr 4:4:4 12bits | ``MEDIA_BUS_FMT_YUV12_1X36`` | ``V4L2_YCBCR_ENC_601`` | + * | | | or ``V4L2_YCBCR_ENC_709`` | + * | | | or ``V4L2_YCBCR_ENC_XV601`` | + * | | | or ``V4L2_YCBCR_ENC_XV709`` | * +----------------------+----------------------------------+------------------------------+ - * + YCbCr 4:4:4 16bits + ``MEDIA_BUS_FMT_YUV16_1X48`` + ``V4L2_YCBCR_ENC_601`` + - * + + + or ``V4L2_YCBCR_ENC_709`` + - * + + + or ``V4L2_YCBCR_ENC_XV601`` + - * + + + or ``V4L2_YCBCR_ENC_XV709`` + + * | YCbCr 4:4:4 16bits | ``MEDIA_BUS_FMT_YUV16_1X48`` | ``V4L2_YCBCR_ENC_601`` | + * | | | or ``V4L2_YCBCR_ENC_709`` | + * | | | or ``V4L2_YCBCR_ENC_XV601`` | + * | | | or ``V4L2_YCBCR_ENC_XV709`` | * +----------------------+----------------------------------+------------------------------+ - * + YCbCr 4:2:2 8bit + ``MEDIA_BUS_FMT_UYVY8_1X16`` + ``V4L2_YCBCR_ENC_601`` + - * + + + or ``V4L2_YCBCR_ENC_709`` + + * | YCbCr 4:2:2 8bit | ``MEDIA_BUS_FMT_UYVY8_1X16`` | ``V4L2_YCBCR_ENC_601`` | + * | | | or ``V4L2_YCBCR_ENC_709`` | * +----------------------+----------------------------------+------------------------------+ - * + YCbCr 4:2:2 10bits + ``MEDIA_BUS_FMT_UYVY10_1X20`` + ``V4L2_YCBCR_ENC_601`` + - * + + + or ``V4L2_YCBCR_ENC_709`` + + * | YCbCr 4:2:2 10bits | ``MEDIA_BUS_FMT_UYVY10_1X20`` | ``V4L2_YCBCR_ENC_601`` | + * | | | or ``V4L2_YCBCR_ENC_709`` | * +----------------------+----------------------------------+------------------------------+ - * + YCbCr 4:2:2 12bits + ``MEDIA_BUS_FMT_UYVY12_1X24`` + ``V4L2_YCBCR_ENC_601`` + - * + + + or ``V4L2_YCBCR_ENC_709`` + + * | YCbCr 4:2:2 12bits | ``MEDIA_BUS_FMT_UYVY12_1X24`` | ``V4L2_YCBCR_ENC_601`` | + * | | | or ``V4L2_YCBCR_ENC_709`` | * +----------------------+----------------------------------+------------------------------+ - * + YCbCr 4:2:0 8bit + ``MEDIA_BUS_FMT_UYYVYY8_0_5X24`` + ``V4L2_YCBCR_ENC_601`` + - * + + + or ``V4L2_YCBCR_ENC_709`` + + * | YCbCr 4:2:0 8bit | ``MEDIA_BUS_FMT_UYYVYY8_0_5X24`` | ``V4L2_YCBCR_ENC_601`` | + * | | | or ``V4L2_YCBCR_ENC_709`` | * +----------------------+----------------------------------+------------------------------+ - * + YCbCr 4:2:0 10bits + ``MEDIA_BUS_FMT_UYYVYY10_0_5X30``+ ``V4L2_YCBCR_ENC_601`` + - * + + + or ``V4L2_YCBCR_ENC_709`` + + * | YCbCr 4:2:0 10bits | ``MEDIA_BUS_FMT_UYYVYY10_0_5X30``| ``V4L2_YCBCR_ENC_601`` | + * | | | or ``V4L2_YCBCR_ENC_709`` | * +----------------------+----------------------------------+------------------------------+ - * + YCbCr 4:2:0 12bits + ``MEDIA_BUS_FMT_UYYVYY12_0_5X36``+ ``V4L2_YCBCR_ENC_601`` + - * + + + or ``V4L2_YCBCR_ENC_709`` + + * | YCbCr 4:2:0 12bits | ``MEDIA_BUS_FMT_UYYVYY12_0_5X36``| ``V4L2_YCBCR_ENC_601`` | + * | | | or ``V4L2_YCBCR_ENC_709`` | * +----------------------+----------------------------------+------------------------------+ - * + YCbCr 4:2:0 16bits + ``MEDIA_BUS_FMT_UYYVYY16_0_5X48``+ ``V4L2_YCBCR_ENC_601`` + - * + + + or ``V4L2_YCBCR_ENC_709`` + + * | YCbCr 4:2:0 16bits | ``MEDIA_BUS_FMT_UYYVYY16_0_5X48``| ``V4L2_YCBCR_ENC_601`` | + * | | | or ``V4L2_YCBCR_ENC_709`` | * +----------------------+----------------------------------+------------------------------+ */ -- cgit v1.2.3 From 29d99b966d60029a11d08b9b004cd84b21ce0d67 Mon Sep 17 00:00:00 2001 From: Shawn Nematbakhsh Date: Tue, 14 Feb 2017 20:58:02 +0100 Subject: cros_ec: Don't signal wake event for non-wake host events The subset of wake-enabled host events is defined by the EC, but the EC may still send non-wake host events if we're in the process of suspending. Get the mask of wake-enabled host events from the EC and filter out non-wake events to prevent spurious aborted suspend attempts. Signed-off-by: Shawn Nematbakhsh Signed-off-by: Thierry Escande Acked-for-MFD-by: Lee Jones Signed-off-by: Benson Leung --- include/linux/mfd/cros_ec.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 3b16c9009749..4e887ba22635 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -149,6 +149,7 @@ struct cros_ec_device { struct ec_response_get_next_event event_data; int event_size; + u32 host_event_wake_mask; }; /** @@ -299,10 +300,12 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev); * cros_ec_get_next_event - Fetch next event from the ChromeOS EC * * @ec_dev: Device to fetch event from + * @wake_event: Pointer to a bool set to true upon return if the event might be + * treated as a wake event. Ignored if null. * * Returns: 0 on success, Linux error number on failure */ -int cros_ec_get_next_event(struct cros_ec_device *ec_dev); +int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event); /** * cros_ec_get_host_event - Return a mask of event set by the EC. -- cgit v1.2.3 From a80a32341fbabd4276165a9ce4fa4c80168c0bef Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Jun 2017 17:17:35 -0400 Subject: svcrdma: Remove svc_rdma_marshal.c svc_rdma_marshal.c has one remaining exported function -- svc_rdma_xdr_decode_req -- and it has a single call site. Take the same approach as the sendto path, and move this function into the source file where it is called. This is a refactoring change only. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index f3787d800ba4..3ca991657889 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -185,9 +185,6 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, struct xdr_buf *rcvbuf); -/* svc_rdma_marshal.c */ -extern int svc_rdma_xdr_decode_req(struct xdr_buf *); - /* svc_rdma_recvfrom.c */ extern int svc_rdma_recvfrom(struct svc_rqst *); extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *, -- cgit v1.2.3 From 5d6dee80a1e94cc284d03e06d930e60e8d3ecf7d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 28 Jun 2017 13:50:05 -0600 Subject: vfio: New external user group/file match At the point where the kvm-vfio pseudo device wants to release its vfio group reference, we can't always acquire a new reference to make that happen. The group can be in a state where we wouldn't allow a new reference to be added. This new helper function allows a caller to match a file to a group to facilitate this. Given a file and group, report if they match. Thus the caller needs to already have a group reference to match to the file. This allows the deletion of a group without acquiring a new reference. Signed-off-by: Alex Williamson Reviewed-by: Eric Auger Reviewed-by: Paolo Bonzini Tested-by: Eric Auger Cc: stable@vger.kernel.org --- include/linux/vfio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index edf9b2cad277..9b34d0af5d27 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -97,6 +97,8 @@ extern void vfio_unregister_iommu_driver( */ extern struct vfio_group *vfio_group_get_external_user(struct file *filep); extern void vfio_group_put_external_user(struct vfio_group *group); +extern bool vfio_external_group_match_file(struct vfio_group *group, + struct file *filep); extern int vfio_external_user_iommu_id(struct vfio_group *group); extern long vfio_external_check_extension(struct vfio_group *group, unsigned long arg); -- cgit v1.2.3 From 334fd34d76f237c0ee58dfc400d2c4e34d660544 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 29 Jun 2017 11:43:20 -0700 Subject: vfs: Add page_cache_seek_hole_data helper Both ext4 and xfs implement seeking for the next hole or piece of data in unwritten extents by scanning the page cache, and both versions share the same bug when iterating the buffers of a page: the start offset into the page isn't taken into account, so when a page fits more than two filesystem blocks, things will go wrong. For example, on a filesystem with a block size of 1k, the following command will fail: xfs_io -f -c "falloc 0 4k" \ -c "pwrite 1k 1k" \ -c "pwrite 3k 1k" \ -c "seek -a -r 0" foo In this example, neither lseek(fd, 1024, SEEK_HOLE) nor lseek(fd, 2048, SEEK_DATA) will return the correct result. Introduce a generic vfs helper for seeking in the page cache that gets this right. The next commits will replace the filesystem specific implementations. Signed-off-by: Andreas Gruenbacher [hch: dropped the export] Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/buffer_head.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index bd029e52ef5e..ad4e024ce17e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -201,6 +201,8 @@ void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); int bh_submit_read(struct buffer_head *bh); +loff_t page_cache_seek_hole_data(struct inode *inode, loff_t offset, + loff_t length, int whence); extern int buffer_heads_over_limit; -- cgit v1.2.3 From 0ed3b0d45fd39142e418220f518c8959c1a5f596 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 29 Jun 2017 11:43:21 -0700 Subject: vfs: Add iomap_seek_hole and iomap_seek_data helpers Filesystems can use this for implementing lseek SEEK_HOLE / SEEK_DATA support via iomap. Signed-off-by: Andreas Gruenbacher [hch: split functions, coding style cleanups] Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index f753e788da31..8a03f5dcd89b 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -83,6 +83,10 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops); int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, loff_t start, loff_t len, const struct iomap_ops *ops); +loff_t iomap_seek_hole(struct inode *inode, loff_t offset, + const struct iomap_ops *ops); +loff_t iomap_seek_data(struct inode *inode, loff_t offset, + const struct iomap_ops *ops); /* * Flags for direct I/O ->end_io: -- cgit v1.2.3 From 468138d78510688fb5476f98d23f11ac6a63229a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 2 May 2017 19:52:17 -0400 Subject: binfmt_flat: flat_{get,put}_addr_from_rp() should be able to fail on MMU targets EFAULT is possible here. Make both return 0 or error, passing what used to be the return value of flat_get_addr_from_rp() by reference. Signed-off-by: Al Viro --- include/linux/flat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/flat.h b/include/linux/flat.h index 2c1eb15c4ba4..7d542dfd0def 100644 --- a/include/linux/flat.h +++ b/include/linux/flat.h @@ -9,8 +9,8 @@ #ifndef _LINUX_FLAT_H #define _LINUX_FLAT_H -#include #include +#include /* * While it would be nice to keep this header clean, users of older -- cgit v1.2.3 From 3170d8d226c2053355f3946b4b5ded4c006fe6d4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 2 May 2017 20:06:33 -0400 Subject: kill {__,}{get,put}_user_unaligned() no users left Signed-off-by: Al Viro --- include/asm-generic/uaccess-unaligned.h | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 include/asm-generic/uaccess-unaligned.h (limited to 'include') diff --git a/include/asm-generic/uaccess-unaligned.h b/include/asm-generic/uaccess-unaligned.h deleted file mode 100644 index 67deb898f0c5..000000000000 --- a/include/asm-generic/uaccess-unaligned.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef __ASM_GENERIC_UACCESS_UNALIGNED_H -#define __ASM_GENERIC_UACCESS_UNALIGNED_H - -/* - * This macro should be used instead of __get_user() when accessing - * values at locations that are not known to be aligned. - */ -#define __get_user_unaligned(x, ptr) \ -({ \ - __typeof__ (*(ptr)) __x; \ - __copy_from_user(&__x, (ptr), sizeof(*(ptr))) ? -EFAULT : 0; \ - (x) = __x; \ -}) - - -/* - * This macro should be used instead of __put_user() when accessing - * values at locations that are not known to be aligned. - */ -#define __put_user_unaligned(x, ptr) \ -({ \ - __typeof__ (*(ptr)) __x = (x); \ - __copy_to_user((ptr), &__x, sizeof(*(ptr))) ? -EFAULT : 0; \ -}) - -#endif /* __ASM_GENERIC_UACCESS_UNALIGNED_H */ -- cgit v1.2.3 From fbd08e7673f950854679e5d79a30bb25e77a9d08 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:10 -0700 Subject: bio-integrity: fix interface for bio_integrity_trim bio_integrity_trim inherent it's interface from bio_trim and accept offset and size, but this API is error prone because data offset must always be insync with bio's data offset. That is why we have integrity update hook in bio_advance() So only meaningful values are: offset == 0, sectors == bio_sectors(bio) Let's just remove them completely. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Dmitry Monakhov Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 664a27da276d..1d74f5120369 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -728,7 +728,7 @@ extern bool bio_integrity_enabled(struct bio *bio); extern int bio_integrity_prep(struct bio *); extern void bio_integrity_endio(struct bio *); extern void bio_integrity_advance(struct bio *, unsigned int); -extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); +extern void bio_integrity_trim(struct bio *); extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); extern int bioset_integrity_create(struct bio_set *, int); extern void bioset_integrity_free(struct bio_set *); @@ -778,8 +778,7 @@ static inline void bio_integrity_advance(struct bio *bio, return; } -static inline void bio_integrity_trim(struct bio *bio, unsigned int offset, - unsigned int sectors) +static inline void bio_integrity_trim(struct bio *bio) { return; } -- cgit v1.2.3 From e23947bd76f00701f9407af23e671f4da96f5f25 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:11 -0700 Subject: bio-integrity: fold bio_integrity_enabled to bio_integrity_prep Currently all integrity prep hooks are open-coded, and if prepare fails we ignore it's code and fail bio with EIO. Let's return real error to upper layer, so later caller may react accordingly. In fact no one want to use bio_integrity_prep() w/o bio_integrity_enabled, so it is reasonable to fold it in to one function. Signed-off-by: Dmitry Monakhov Reviewed-by: Martin K. Petersen [hch: merged with the latest block tree, return bool from bio_integrity_prep] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 1d74f5120369..b3b5f5a89a9c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -724,8 +724,7 @@ struct biovec_slab { extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); extern void bio_integrity_free(struct bio *); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); -extern bool bio_integrity_enabled(struct bio *bio); -extern int bio_integrity_prep(struct bio *); +extern bool bio_integrity_prep(struct bio *); extern void bio_integrity_endio(struct bio *); extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_trim(struct bio *); @@ -741,11 +740,6 @@ static inline void *bio_integrity(struct bio *bio) return NULL; } -static inline bool bio_integrity_enabled(struct bio *bio) -{ - return false; -} - static inline int bioset_integrity_create(struct bio_set *bs, int pool_size) { return 0; @@ -756,9 +750,9 @@ static inline void bioset_integrity_free (struct bio_set *bs) return; } -static inline int bio_integrity_prep(struct bio *bio) +static inline bool bio_integrity_prep(struct bio *bio) { - return 0; + return true; } static inline void bio_integrity_free(struct bio *bio) -- cgit v1.2.3 From 128b6f9fdd9ace9e56cb3a263b4bc269658f9c40 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:12 -0700 Subject: t10-pi: Move opencoded contants to common header Signed-off-by: Dmitry Monakhov Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/t10-pi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index 9375d23a24e7..635a3c5706bd 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -33,6 +33,8 @@ struct t10_pi_tuple { __be32 ref_tag; /* Target LBA or indirect LBA */ }; +#define T10_PI_APP_ESCAPE cpu_to_be16(0xffff) +#define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff) extern const struct blk_integrity_profile t10_pi_type1_crc; extern const struct blk_integrity_profile t10_pi_type1_ip; -- cgit v1.2.3 From b1fb2c52b2d85f51f36f1661409f9aeef94265ff Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:13 -0700 Subject: block: guard bvec iteration logic Currently if some one try to advance bvec beyond it's size we simply dump WARN_ONCE and continue to iterate beyond bvec array boundaries. This simply means that we endup dereferencing/corrupting random memory region. Sane reaction would be to propagate error back to calling context But bvec_iter_advance's calling context is not always good for error handling. For safity reason let truncate iterator size to zero which will break external iteration loop which prevent us from unpredictable memory range corruption. And even it caller ignores an error, it will corrupt it's own bvecs, not others. This patch does: - Return error back to caller with hope that it will react on this - Truncate iterator size Code was added long time ago here 4550dd6c, luckily no one hit it in real life :) Signed-off-by: Dmitry Monakhov Reviewed-by: Ming Lei Reviewed-by: Martin K. Petersen [hch: switch to true/false returns instead of errno values] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 +++- include/linux/bvec.h | 14 +++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index b3b5f5a89a9c..d5e8689f86b8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -167,8 +167,10 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, if (bio_no_advance_iter(bio)) iter->bi_size -= bytes; - else + else { bvec_iter_advance(bio->bi_io_vec, iter, bytes); + /* TODO: It is reasonable to complete bio with error here. */ + } } #define __bio_for_each_segment(bvl, bio, iter, start) \ diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 89b65b82d98f..de317b4c13c1 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -22,6 +22,7 @@ #include #include +#include /* * was unsigned short, but we might as well be ready for > 64kB I/O pages @@ -66,12 +67,14 @@ struct bvec_iter { .bv_offset = bvec_iter_offset((bvec), (iter)), \ }) -static inline void bvec_iter_advance(const struct bio_vec *bv, - struct bvec_iter *iter, - unsigned bytes) +static inline bool bvec_iter_advance(const struct bio_vec *bv, + struct bvec_iter *iter, unsigned bytes) { - WARN_ONCE(bytes > iter->bi_size, - "Attempted to advance past end of bvec iter\n"); + if (WARN_ONCE(bytes > iter->bi_size, + "Attempted to advance past end of bvec iter\n")) { + iter->bi_size = 0; + return false; + } while (bytes) { unsigned iter_len = bvec_iter_len(bv, *iter); @@ -86,6 +89,7 @@ static inline void bvec_iter_advance(const struct bio_vec *bv, iter->bi_idx++; } } + return true; } #define for_each_bvec(bvl, bio_vec, iter, start) \ -- cgit v1.2.3 From f9df1cd99ebd82f05e8f5e0aa7e38cb8d3c791d7 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 29 Jun 2017 11:31:14 -0700 Subject: bio: add bvec_iter rewind API Some ->bi_end_io handlers (for example: pi_verify or decrypt handlers) need to know original data vector, but after bio traverse io-stack it may be advanced, splited and relocated many times so it is hard to guess original iterator. Let's add 'bi_done' conter which accounts number of bytes iterator was advanced during it's evolution. Later end_io handler may easily restore original iterator by rewinding iterator to iter->bi_done. Note: this change makes sizeof (struct bvec_iter) multiple to 8 Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Dmitry Monakhov [hch: switched to true/false return] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 19 +++++++++++++++++-- include/linux/bvec.h | 27 +++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index d5e8689f86b8..1eba19580185 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -165,14 +165,29 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, { iter->bi_sector += bytes >> 9; - if (bio_no_advance_iter(bio)) + if (bio_no_advance_iter(bio)) { iter->bi_size -= bytes; - else { + iter->bi_done += bytes; + } else { bvec_iter_advance(bio->bi_io_vec, iter, bytes); /* TODO: It is reasonable to complete bio with error here. */ } } +static inline bool bio_rewind_iter(struct bio *bio, struct bvec_iter *iter, + unsigned int bytes) +{ + iter->bi_sector -= bytes >> 9; + + if (bio_no_advance_iter(bio)) { + iter->bi_size += bytes; + iter->bi_done -= bytes; + return true; + } + + return bvec_iter_rewind(bio->bi_io_vec, iter, bytes); +} + #define __bio_for_each_segment(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ diff --git a/include/linux/bvec.h b/include/linux/bvec.h index de317b4c13c1..ec8a4d7af6bd 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -40,6 +40,8 @@ struct bvec_iter { unsigned int bi_idx; /* current index into bvl_vec */ + unsigned int bi_done; /* number of bytes completed */ + unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ }; @@ -83,6 +85,7 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, bytes -= len; iter->bi_size -= len; iter->bi_bvec_done += len; + iter->bi_done += len; if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) { iter->bi_bvec_done = 0; @@ -92,6 +95,30 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, return true; } +static inline bool bvec_iter_rewind(const struct bio_vec *bv, + struct bvec_iter *iter, + unsigned int bytes) +{ + while (bytes) { + unsigned len = min(bytes, iter->bi_bvec_done); + + if (iter->bi_bvec_done == 0) { + if (WARN_ONCE(iter->bi_idx == 0, + "Attempted to rewind iter beyond " + "bvec's boundaries\n")) { + return false; + } + iter->bi_idx--; + iter->bi_bvec_done = __bvec_iter_bvec(bv, *iter)->bv_len; + continue; + } + bytes -= len; + iter->bi_size += len; + iter->bi_bvec_done -= len; + } + return true; +} + #define for_each_bvec(bvl, bio_vec, iter, start) \ for (iter = (start); \ (iter).bi_size && \ -- cgit v1.2.3 From 7c20f11680a441df09de7235206f70115fbf6290 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Jul 2017 16:58:43 -0600 Subject: bio-integrity: stop abusing bi_end_io And instead call directly into the integrity code from bio_end_io. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 1eba19580185..7b1cf4ba0902 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -320,8 +320,6 @@ struct bio_integrity_payload { struct bvec_iter bip_iter; - bio_end_io_t *bip_end_io; /* saved I/O completion fn */ - unsigned short bip_slab; /* slab the bip came from */ unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_max_vcnt; /* integrity bio_vec slots */ @@ -739,10 +737,8 @@ struct biovec_slab { bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); -extern void bio_integrity_free(struct bio *); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern bool bio_integrity_prep(struct bio *); -extern void bio_integrity_endio(struct bio *); extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_trim(struct bio *); extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); @@ -772,11 +768,6 @@ static inline bool bio_integrity_prep(struct bio *bio) return true; } -static inline void bio_integrity_free(struct bio *bio) -{ - return; -} - static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) { -- cgit v1.2.3 From 2bece49394872d36bbc5767fd643deac05920c55 Mon Sep 17 00:00:00 2001 From: Loc Ho Date: Mon, 3 Jul 2017 14:33:08 -0700 Subject: ACPI: SPCR: Use access width to determine mmio usage The current SPCR code does not check the access width of the MMIO, and uses a default of 8bit register accesses. This prevents devices that only do 16 or 32bit register accesses from working. By simply checking this field and setting the MMIO string appropriately, this issue can be corrected. To prevent any legacy issues, the code will default to 8bit accesses if the value is anything but 16 or 32. Signed-off-by: Jon Mason Signed-off-by: Loc Ho Acked-by: Greg Kroah-Hartman Signed-off-by: Rafael J. Wysocki --- include/acpi/acrestyp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/acpi/acrestyp.h b/include/acpi/acrestyp.h index f0f7403d2000..781cb555c960 100644 --- a/include/acpi/acrestyp.h +++ b/include/acpi/acrestyp.h @@ -372,6 +372,13 @@ struct acpi_resource_generic_register { u64 address; }; +/* Generic Address Space Access Sizes */ +#define ACPI_ACCESS_SIZE_UNDEFINED 0 +#define ACPI_ACCESS_SIZE_BYTE 1 +#define ACPI_ACCESS_SIZE_WORD 2 +#define ACPI_ACCESS_SIZE_DWORD 3 +#define ACPI_ACCESS_SIZE_QWORD 4 + struct acpi_resource_gpio { u8 revision_id; u8 connection_type; -- cgit v1.2.3 From ad0af7104dadccd55cd2b390271677fac142650f Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 21 Jun 2017 15:28:32 +0300 Subject: vfs: introduce inode 'inuse' lock Added an i_state flag I_INUSE and helpers to set/clear/test the bit. The 'inuse' lock is an 'advisory' inode lock, that can be used to extend exclusive create protection beyond parent->i_mutex lock among cooperating users. This is going to be used by overlayfs to get exclusive ownership on upper and work dirs among overlayfs mounts. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3e68cabb8457..75a5fafaf096 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1930,6 +1930,9 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode) * wb stat updates to grab mapping->tree_lock. See * inode_switch_wb_work_fn() for details. * + * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper + * and work dirs among overlayfs mounts. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -1950,6 +1953,7 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode) #define __I_DIRTY_TIME_EXPIRED 12 #define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) #define I_WB_SWITCH (1 << 13) +#define I_OVL_INUSE (1 << 14) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) -- cgit v1.2.3 From f35157417215ec138c920320c746fdb3e04ef1d5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 4 Jul 2017 17:25:02 +0100 Subject: Provide a function to create a NUL-terminated string from unterminated data Provide a function, kmemdup_nul(), that will create a NUL-terminated string from an unterminated character array where the length is known in advance. This is better than kstrndup() in situations where we already know the string length as the strnlen() in kstrndup() is superfluous. Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/string.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index 537918f8a98e..3dd944cfe171 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -131,6 +131,7 @@ extern char *kstrdup(const char *s, gfp_t gfp) __malloc; extern const char *kstrdup_const(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp); +extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); extern char **argv_split(gfp_t gfp, const char *str, int *argcp); extern void argv_free(char **argv); -- cgit v1.2.3 From ee416bcdba9975065de571e09de1f7ebfde2156a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 4 Jul 2017 17:25:16 +0100 Subject: VFS: Make get_filesystem() return the affected filesystem Make get_filesystem() return a pointer to the filesystem on which it just got a ref. Suggested-by: Rasmus Villemoes Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 803e5a9b2654..bc0c054894b9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2956,7 +2956,7 @@ extern int generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len, get_block_t *get_block); -extern void get_filesystem(struct file_system_type *fs); +extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); -- cgit v1.2.3 From cdf01226b26e98c79c13b335fbe0cbbbe850cf44 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 4 Jul 2017 17:25:22 +0100 Subject: VFS: Provide empty name qstr Provide an empty name (ie. "") qstr for general use. Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/dcache.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d2e38dc6172c..3f65a4fa72ed 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -55,6 +55,11 @@ struct qstr { #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } +extern const char empty_string[]; +extern const struct qstr empty_name; +extern const char slash_string[]; +extern const struct qstr slash_name; + struct dentry_stat_t { long nr_dentry; long nr_unused; -- cgit v1.2.3 From 4a25220d4e43bb2461823dbc7eb1502a34087958 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Jul 2017 16:24:18 +0100 Subject: hugetlbfs: Implement show_options Implement the show_options superblock op for hugetlbfs as part of a bid to get rid of s_options and generic_show_options() to make it easier to implement a context-based mount where the mount options can be passed individually over a file descriptor. Note that the uid and gid should possibly be displayed relative to the viewer's user namespace. Signed-off-by: David Howells cc: Nadia Yvette Chambers Signed-off-by: Al Viro --- include/linux/hugetlb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b857fc8cc2ec..3b6eeaad2f77 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -262,6 +262,9 @@ struct hugetlbfs_sb_info { spinlock_t stat_lock; struct hstate *hstate; struct hugepage_subpool *spool; + kuid_t uid; + kgid_t gid; + umode_t mode; }; static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) -- cgit v1.2.3 From 60934b200ddd62187b149a7f32cc0f160c08a7ed Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 14 Dec 2016 02:49:13 +0300 Subject: NTB: Make link-state API being declared first Since link operations are usually performed before memory window access operations, it's logically better to declare link-related API before any of MW/Doorbell/Scratchpad methods. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- include/linux/ntb.h | 137 ++++++++++++++++++++++++++-------------------------- 1 file changed, 69 insertions(+), 68 deletions(-) (limited to 'include') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index de87ceac110e..d7ab3e1ec88f 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -179,13 +179,13 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) /** * struct ntb_ctx_ops - ntb device operations + * @link_is_up: See ntb_link_is_up(). + * @link_enable: See ntb_link_enable(). + * @link_disable: See ntb_link_disable(). * @mw_count: See ntb_mw_count(). * @mw_get_range: See ntb_mw_get_range(). * @mw_set_trans: See ntb_mw_set_trans(). * @mw_clear_trans: See ntb_mw_clear_trans(). - * @link_is_up: See ntb_link_is_up(). - * @link_enable: See ntb_link_enable(). - * @link_disable: See ntb_link_disable(). * @db_is_unsafe: See ntb_db_is_unsafe(). * @db_valid_mask: See ntb_db_valid_mask(). * @db_vector_count: See ntb_db_vector_count(). @@ -212,6 +212,12 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) * @peer_spad_write: See ntb_peer_spad_write(). */ struct ntb_dev_ops { + int (*link_is_up)(struct ntb_dev *ntb, + enum ntb_speed *speed, enum ntb_width *width); + int (*link_enable)(struct ntb_dev *ntb, + enum ntb_speed max_speed, enum ntb_width max_width); + int (*link_disable)(struct ntb_dev *ntb); + int (*mw_count)(struct ntb_dev *ntb); int (*mw_get_range)(struct ntb_dev *ntb, int idx, phys_addr_t *base, resource_size_t *size, @@ -220,12 +226,6 @@ struct ntb_dev_ops { dma_addr_t addr, resource_size_t size); int (*mw_clear_trans)(struct ntb_dev *ntb, int idx); - int (*link_is_up)(struct ntb_dev *ntb, - enum ntb_speed *speed, enum ntb_width *width); - int (*link_enable)(struct ntb_dev *ntb, - enum ntb_speed max_speed, enum ntb_width max_width); - int (*link_disable)(struct ntb_dev *ntb); - int (*db_is_unsafe)(struct ntb_dev *ntb); u64 (*db_valid_mask)(struct ntb_dev *ntb); int (*db_vector_count)(struct ntb_dev *ntb); @@ -265,13 +265,14 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) { /* commented callbacks are not required: */ return + ops->link_is_up && + ops->link_enable && + ops->link_disable && ops->mw_count && ops->mw_get_range && ops->mw_set_trans && /* ops->mw_clear_trans && */ - ops->link_is_up && - ops->link_enable && - ops->link_disable && + /* ops->db_is_unsafe && */ ops->db_valid_mask && @@ -440,6 +441,62 @@ void ntb_link_event(struct ntb_dev *ntb); */ void ntb_db_event(struct ntb_dev *ntb, int vector); +/** + * ntb_link_is_up() - get the current ntb link state + * @ntb: NTB device context. + * @speed: OUT - The link speed expressed as PCIe generation number. + * @width: OUT - The link width expressed as the number of PCIe lanes. + * + * Get the current state of the ntb link. It is recommended to query the link + * state once after every link event. It is safe to query the link state in + * the context of the link event callback. + * + * Return: One if the link is up, zero if the link is down, otherwise a + * negative value indicating the error number. + */ +static inline int ntb_link_is_up(struct ntb_dev *ntb, + enum ntb_speed *speed, enum ntb_width *width) +{ + return ntb->ops->link_is_up(ntb, speed, width); +} + +/** + * ntb_link_enable() - enable the link on the secondary side of the ntb + * @ntb: NTB device context. + * @max_speed: The maximum link speed expressed as PCIe generation number. + * @max_width: The maximum link width expressed as the number of PCIe lanes. + * + * Enable the link on the secondary side of the ntb. This can only be done + * from the primary side of the ntb in primary or b2b topology. The ntb device + * should train the link to its maximum speed and width, or the requested speed + * and width, whichever is smaller, if supported. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_link_enable(struct ntb_dev *ntb, + enum ntb_speed max_speed, + enum ntb_width max_width) +{ + return ntb->ops->link_enable(ntb, max_speed, max_width); +} + +/** + * ntb_link_disable() - disable the link on the secondary side of the ntb + * @ntb: NTB device context. + * + * Disable the link on the secondary side of the ntb. This can only be + * done from the primary side of the ntb in primary or b2b topology. The ntb + * device should disable the link. Returning from this call must indicate that + * a barrier has passed, though with no more writes may pass in either + * direction across the link, except if this call returns an error number. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_link_disable(struct ntb_dev *ntb) +{ + return ntb->ops->link_disable(ntb); +} + /** * ntb_mw_count() - get the number of memory windows * @ntb: NTB device context. @@ -516,62 +573,6 @@ static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx) return ntb->ops->mw_clear_trans(ntb, idx); } -/** - * ntb_link_is_up() - get the current ntb link state - * @ntb: NTB device context. - * @speed: OUT - The link speed expressed as PCIe generation number. - * @width: OUT - The link width expressed as the number of PCIe lanes. - * - * Get the current state of the ntb link. It is recommended to query the link - * state once after every link event. It is safe to query the link state in - * the context of the link event callback. - * - * Return: One if the link is up, zero if the link is down, otherwise a - * negative value indicating the error number. - */ -static inline int ntb_link_is_up(struct ntb_dev *ntb, - enum ntb_speed *speed, enum ntb_width *width) -{ - return ntb->ops->link_is_up(ntb, speed, width); -} - -/** - * ntb_link_enable() - enable the link on the secondary side of the ntb - * @ntb: NTB device context. - * @max_speed: The maximum link speed expressed as PCIe generation number. - * @max_width: The maximum link width expressed as the number of PCIe lanes. - * - * Enable the link on the secondary side of the ntb. This can only be done - * from the primary side of the ntb in primary or b2b topology. The ntb device - * should train the link to its maximum speed and width, or the requested speed - * and width, whichever is smaller, if supported. - * - * Return: Zero on success, otherwise an error number. - */ -static inline int ntb_link_enable(struct ntb_dev *ntb, - enum ntb_speed max_speed, - enum ntb_width max_width) -{ - return ntb->ops->link_enable(ntb, max_speed, max_width); -} - -/** - * ntb_link_disable() - disable the link on the secondary side of the ntb - * @ntb: NTB device context. - * - * Disable the link on the secondary side of the ntb. This can only be - * done from the primary side of the ntb in primary or b2b topology. The ntb - * device should disable the link. Returning from this call must indicate that - * a barrier has passed, though with no more writes may pass in either - * direction across the link, except if this call returns an error number. - * - * Return: Zero on success, otherwise an error number. - */ -static inline int ntb_link_disable(struct ntb_dev *ntb) -{ - return ntb->ops->link_disable(ntb); -} - /** * ntb_db_is_unsafe() - check if it is safe to use hardware doorbell * @ntb: NTB device context. -- cgit v1.2.3 From 1e5301196a88961b02fe43c73a952f78b2c84712 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 14 Dec 2016 02:49:14 +0300 Subject: NTB: Add indexed ports NTB API There is some NTB hardware, which can combine more than just two domains over NTB. For instance, some IDT PCIe-switches can have NTB-functions activated on more than two-ports. The different domains are distinguished by ports they are connected to. So the new port-related methods are added to the NTB API: ntb_port_number() - return local port ntb_peer_port_count() - return number of peers local port can connect to ntb_peer_port_number(pdix) - return port number by it index ntb_peer_port_idx(port) - return port index by it number Current test-drivers aren't changed much. They still support two-ports devices for the time being while multi-ports hardware drivers aren't added. By default port-related API is declared for two-ports hardware. So corresponding hardware drivers won't need to implement it. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- include/linux/ntb.h | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) (limited to 'include') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index d7ab3e1ec88f..d23483bae6f3 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -139,6 +139,20 @@ enum ntb_width { NTB_WIDTH_32 = 32, }; +/** + * enum ntb_default_port - NTB default port number + * @NTB_PORT_PRI_USD: Default port of the NTB_TOPO_PRI/NTB_TOPO_B2B_USD + * topologies + * @NTB_PORT_SEC_DSD: Default port of the NTB_TOPO_SEC/NTB_TOPO_B2B_DSD + * topologies + */ +enum ntb_default_port { + NTB_PORT_PRI_USD, + NTB_PORT_SEC_DSD +}; +#define NTB_DEF_PEER_CNT (1) +#define NTB_DEF_PEER_IDX (0) + /** * struct ntb_client_ops - ntb client operations * @probe: Notify client of a new device. @@ -179,6 +193,10 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) /** * struct ntb_ctx_ops - ntb device operations + * @port_number: See ntb_port_number(). + * @peer_port_count: See ntb_peer_port_count(). + * @peer_port_number: See ntb_peer_port_number(). + * @peer_port_idx: See ntb_peer_port_idx(). * @link_is_up: See ntb_link_is_up(). * @link_enable: See ntb_link_enable(). * @link_disable: See ntb_link_disable(). @@ -212,6 +230,11 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) * @peer_spad_write: See ntb_peer_spad_write(). */ struct ntb_dev_ops { + int (*port_number)(struct ntb_dev *ntb); + int (*peer_port_count)(struct ntb_dev *ntb); + int (*peer_port_number)(struct ntb_dev *ntb, int pidx); + int (*peer_port_idx)(struct ntb_dev *ntb, int port); + int (*link_is_up)(struct ntb_dev *ntb, enum ntb_speed *speed, enum ntb_width *width); int (*link_enable)(struct ntb_dev *ntb, @@ -265,6 +288,9 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) { /* commented callbacks are not required: */ return + !ops->peer_port_count == !ops->port_number && + !ops->peer_port_number == !ops->port_number && + !ops->peer_port_idx == !ops->port_number && ops->link_is_up && ops->link_enable && ops->link_disable && @@ -441,6 +467,136 @@ void ntb_link_event(struct ntb_dev *ntb); */ void ntb_db_event(struct ntb_dev *ntb, int vector); +/** + * ntb_default_port_number() - get the default local port number + * @ntb: NTB device context. + * + * If hardware driver doesn't specify port_number() callback method, the NTB + * is considered with just two ports. So this method returns default local + * port number in compliance with topology. + * + * NOTE Don't call this method directly. The ntb_port_number() function should + * be used instead. + * + * Return: the default local port number + */ +int ntb_default_port_number(struct ntb_dev *ntb); + +/** + * ntb_default_port_count() - get the default number of peer device ports + * @ntb: NTB device context. + * + * By default hardware driver supports just one peer device. + * + * NOTE Don't call this method directly. The ntb_peer_port_count() function + * should be used instead. + * + * Return: the default number of peer ports + */ +int ntb_default_peer_port_count(struct ntb_dev *ntb); + +/** + * ntb_default_peer_port_number() - get the default peer port by given index + * @ntb: NTB device context. + * @idx: Peer port index (should not differ from zero). + * + * By default hardware driver supports just one peer device, so this method + * shall return the corresponding value from enum ntb_default_port. + * + * NOTE Don't call this method directly. The ntb_peer_port_number() function + * should be used instead. + * + * Return: the peer device port or negative value indicating an error + */ +int ntb_default_peer_port_number(struct ntb_dev *ntb, int pidx); + +/** + * ntb_default_peer_port_idx() - get the default peer device port index by + * given port number + * @ntb: NTB device context. + * @port: Peer port number (should be one of enum ntb_default_port). + * + * By default hardware driver supports just one peer device, so while + * specified port-argument indicates peer port from enum ntb_default_port, + * the return value shall be zero. + * + * NOTE Don't call this method directly. The ntb_peer_port_idx() function + * should be used instead. + * + * Return: the peer port index or negative value indicating an error + */ +int ntb_default_peer_port_idx(struct ntb_dev *ntb, int port); + +/** + * ntb_port_number() - get the local port number + * @ntb: NTB device context. + * + * Hardware must support at least simple two-ports ntb connection + * + * Return: the local port number + */ +static inline int ntb_port_number(struct ntb_dev *ntb) +{ + if (!ntb->ops->port_number) + return ntb_default_port_number(ntb); + + return ntb->ops->port_number(ntb); +} + +/** + * ntb_peer_port_count() - get the number of peer device ports + * @ntb: NTB device context. + * + * Hardware may support an access to memory of several remote domains + * over multi-port NTB devices. This method returns the number of peers, + * local device can have shared memory with. + * + * Return: the number of peer ports + */ +static inline int ntb_peer_port_count(struct ntb_dev *ntb) +{ + if (!ntb->ops->peer_port_count) + return ntb_default_peer_port_count(ntb); + + return ntb->ops->peer_port_count(ntb); +} + +/** + * ntb_peer_port_number() - get the peer port by given index + * @ntb: NTB device context. + * @pidx: Peer port index. + * + * Peer ports are continuously enumerated by NTB API logic, so this method + * lets to retrieve port real number by its index. + * + * Return: the peer device port or negative value indicating an error + */ +static inline int ntb_peer_port_number(struct ntb_dev *ntb, int pidx) +{ + if (!ntb->ops->peer_port_number) + return ntb_default_peer_port_number(ntb, pidx); + + return ntb->ops->peer_port_number(ntb, pidx); +} + +/** + * ntb_peer_port_idx() - get the peer device port index by given port number + * @ntb: NTB device context. + * @port: Peer port number. + * + * Inverse operation of ntb_peer_port_number(), so one can get port index + * by specified port number. + * + * Return: the peer port index or negative value indicating an error + */ +static inline int ntb_peer_port_idx(struct ntb_dev *ntb, int port) +{ + if (!ntb->ops->peer_port_idx) + return ntb_default_peer_port_idx(ntb, port); + + return ntb->ops->peer_port_idx(ntb, port); +} + /** * ntb_link_is_up() - get the current ntb link state * @ntb: NTB device context. -- cgit v1.2.3 From 4e8c11b7fd29f70eb7af43bae908297689f2c3da Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 14 Dec 2016 02:49:15 +0300 Subject: NTB: Alter link-state API to support multi-port devices Multi-port devices permit the NTB connections between multiple domains, so a local device can have NTB link being up with one peer and being down with another. NTB link-state API is appropriately altered to return a bitfield of the link-states between the local device and possible peers. Signed-off-by: Serge Semin Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index d23483bae6f3..b2b2924f5f43 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -235,7 +235,7 @@ struct ntb_dev_ops { int (*peer_port_number)(struct ntb_dev *ntb, int pidx); int (*peer_port_idx)(struct ntb_dev *ntb, int port); - int (*link_is_up)(struct ntb_dev *ntb, + u64 (*link_is_up)(struct ntb_dev *ntb, enum ntb_speed *speed, enum ntb_width *width); int (*link_enable)(struct ntb_dev *ntb, enum ntb_speed max_speed, enum ntb_width max_width); @@ -607,25 +607,26 @@ static inline int ntb_peer_port_idx(struct ntb_dev *ntb, int port) * state once after every link event. It is safe to query the link state in * the context of the link event callback. * - * Return: One if the link is up, zero if the link is down, otherwise a - * negative value indicating the error number. + * Return: bitfield of indexed ports link state: bit is set/cleared if the + * link is up/down respectively. */ -static inline int ntb_link_is_up(struct ntb_dev *ntb, +static inline u64 ntb_link_is_up(struct ntb_dev *ntb, enum ntb_speed *speed, enum ntb_width *width) { return ntb->ops->link_is_up(ntb, speed, width); } /** - * ntb_link_enable() - enable the link on the secondary side of the ntb + * ntb_link_enable() - enable the local port ntb connection * @ntb: NTB device context. * @max_speed: The maximum link speed expressed as PCIe generation number. * @max_width: The maximum link width expressed as the number of PCIe lanes. * - * Enable the link on the secondary side of the ntb. This can only be done - * from the primary side of the ntb in primary or b2b topology. The ntb device - * should train the link to its maximum speed and width, or the requested speed - * and width, whichever is smaller, if supported. + * Enable the NTB/PCIe link on the local or remote (for bridge-to-bridge + * topology) side of the bridge. If it's supported the ntb device should train + * the link to its maximum speed and width, or the requested speed and width, + * whichever is smaller. Some hardware doesn't support PCIe link training, so + * the last two arguments will be ignored then. * * Return: Zero on success, otherwise an error number. */ @@ -637,14 +638,14 @@ static inline int ntb_link_enable(struct ntb_dev *ntb, } /** - * ntb_link_disable() - disable the link on the secondary side of the ntb + * ntb_link_disable() - disable the local port ntb connection * @ntb: NTB device context. * - * Disable the link on the secondary side of the ntb. This can only be - * done from the primary side of the ntb in primary or b2b topology. The ntb - * device should disable the link. Returning from this call must indicate that - * a barrier has passed, though with no more writes may pass in either - * direction across the link, except if this call returns an error number. + * Disable the link on the local or remote (for b2b topology) of the ntb. + * The ntb device should disable the link. Returning from this call must + * indicate that a barrier has passed, though with no more writes may pass in + * either direction across the link, except if this call returns an error + * number. * * Return: Zero on success, otherwise an error number. */ -- cgit v1.2.3 From 443b9a14ecbe811071467d54d6f2f1182835cc4d Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 11 Jan 2017 03:11:33 +0300 Subject: NTB: Alter MW API to support multi-ports devices Multi-port NTB devices permit to share a memory between all accessible peers. Memory Windows API is altered to correspondingly initialize and map memory windows for such devices: ntb_mw_count(pidx); - number of inbound memory windows, which can be allocated for shared buffer with specified peer device. ntb_mw_get_align(pidx, widx); - get alignment and size restriction parameters to properly allocate inbound memory region. ntb_peer_mw_count(); - get number of outbound memory windows. ntb_peer_mw_get_addr(widx); - get mapping address of an outbound memory window If hardware supports inbound translation configured on the local ntb port: ntb_mw_set_trans(pidx, widx); - set translation address of allocated inbound memory window so a peer device could access it. ntb_mw_clear_trans(pidx, widx); - clear the translation address of an inbound memory window. If hardware supports outbound translation configured on the peer ntb port: ntb_peer_mw_set_trans(pidx, widx); - set translation address of a memory window retrieved from a peer device ntb_peer_mw_clear_trans(pidx, widx); - clear the translation address of an outbound memory window Signed-off-by: Serge Semin Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 208 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 163 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index b2b2924f5f43..2ea83f91a236 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -5,6 +5,7 @@ * GPL LICENSE SUMMARY * * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * Copyright (C) 2016 T-Platforms. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -18,6 +19,7 @@ * BSD LICENSE * * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * Copyright (C) 2016 T-Platforms. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -201,9 +203,13 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) * @link_enable: See ntb_link_enable(). * @link_disable: See ntb_link_disable(). * @mw_count: See ntb_mw_count(). - * @mw_get_range: See ntb_mw_get_range(). + * @mw_get_align: See ntb_mw_get_align(). * @mw_set_trans: See ntb_mw_set_trans(). * @mw_clear_trans: See ntb_mw_clear_trans(). + * @peer_mw_count: See ntb_peer_mw_count(). + * @peer_mw_get_addr: See ntb_peer_mw_get_addr(). + * @peer_mw_set_trans: See ntb_peer_mw_set_trans(). + * @peer_mw_clear_trans:See ntb_peer_mw_clear_trans(). * @db_is_unsafe: See ntb_db_is_unsafe(). * @db_valid_mask: See ntb_db_valid_mask(). * @db_vector_count: See ntb_db_vector_count(). @@ -241,13 +247,20 @@ struct ntb_dev_ops { enum ntb_speed max_speed, enum ntb_width max_width); int (*link_disable)(struct ntb_dev *ntb); - int (*mw_count)(struct ntb_dev *ntb); - int (*mw_get_range)(struct ntb_dev *ntb, int idx, - phys_addr_t *base, resource_size_t *size, - resource_size_t *align, resource_size_t *align_size); - int (*mw_set_trans)(struct ntb_dev *ntb, int idx, + int (*mw_count)(struct ntb_dev *ntb, int pidx); + int (*mw_get_align)(struct ntb_dev *ntb, int pidx, int widx, + resource_size_t *addr_align, + resource_size_t *size_align, + resource_size_t *size_max); + int (*mw_set_trans)(struct ntb_dev *ntb, int pidx, int widx, dma_addr_t addr, resource_size_t size); - int (*mw_clear_trans)(struct ntb_dev *ntb, int idx); + int (*mw_clear_trans)(struct ntb_dev *ntb, int pidx, int widx); + int (*peer_mw_count)(struct ntb_dev *ntb); + int (*peer_mw_get_addr)(struct ntb_dev *ntb, int widx, + phys_addr_t *base, resource_size_t *size); + int (*peer_mw_set_trans)(struct ntb_dev *ntb, int pidx, int widx, + u64 addr, resource_size_t size); + int (*peer_mw_clear_trans)(struct ntb_dev *ntb, int pidx, int widx); int (*db_is_unsafe)(struct ntb_dev *ntb); u64 (*db_valid_mask)(struct ntb_dev *ntb); @@ -295,9 +308,13 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) ops->link_enable && ops->link_disable && ops->mw_count && - ops->mw_get_range && - ops->mw_set_trans && + ops->mw_get_align && + (ops->mw_set_trans || + ops->peer_mw_set_trans) && /* ops->mw_clear_trans && */ + ops->peer_mw_count && + ops->peer_mw_get_addr && + /* ops->peer_mw_clear_trans && */ /* ops->db_is_unsafe && */ ops->db_valid_mask && @@ -655,79 +672,180 @@ static inline int ntb_link_disable(struct ntb_dev *ntb) } /** - * ntb_mw_count() - get the number of memory windows + * ntb_mw_count() - get the number of inbound memory windows, which could + * be created for a specified peer device * @ntb: NTB device context. + * @pidx: Port index of peer device. * * Hardware and topology may support a different number of memory windows. + * Moreover different peer devices can support different number of memory + * windows. Simply speaking this method returns the number of possible inbound + * memory windows to share with specified peer device. * * Return: the number of memory windows. */ -static inline int ntb_mw_count(struct ntb_dev *ntb) +static inline int ntb_mw_count(struct ntb_dev *ntb, int pidx) { - return ntb->ops->mw_count(ntb); + return ntb->ops->mw_count(ntb, pidx); } /** - * ntb_mw_get_range() - get the range of a memory window + * ntb_mw_get_align() - get the restriction parameters of inbound memory window * @ntb: NTB device context. - * @idx: Memory window number. - * @base: OUT - the base address for mapping the memory window - * @size: OUT - the size for mapping the memory window - * @align: OUT - the base alignment for translating the memory window - * @align_size: OUT - the size alignment for translating the memory window - * - * Get the range of a memory window. NULL may be given for any output - * parameter if the value is not needed. The base and size may be used for - * mapping the memory window, to access the peer memory. The alignment and - * size may be used for translating the memory window, for the peer to access - * memory on the local system. - * - * Return: Zero on success, otherwise an error number. + * @pidx: Port index of peer device. + * @widx: Memory window index. + * @addr_align: OUT - the base alignment for translating the memory window + * @size_align: OUT - the size alignment for translating the memory window + * @size_max: OUT - the maximum size of the memory window + * + * Get the alignments of an inbound memory window with specified index. + * NULL may be given for any output parameter if the value is not needed. + * The alignment and size parameters may be used for allocation of proper + * shared memory. + * + * Return: Zero on success, otherwise a negative error number. */ -static inline int ntb_mw_get_range(struct ntb_dev *ntb, int idx, - phys_addr_t *base, resource_size_t *size, - resource_size_t *align, resource_size_t *align_size) +static inline int ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int widx, + resource_size_t *addr_align, + resource_size_t *size_align, + resource_size_t *size_max) { - return ntb->ops->mw_get_range(ntb, idx, base, size, - align, align_size); + return ntb->ops->mw_get_align(ntb, pidx, widx, addr_align, size_align, + size_max); } /** - * ntb_mw_set_trans() - set the translation of a memory window + * ntb_mw_set_trans() - set the translation of an inbound memory window * @ntb: NTB device context. - * @idx: Memory window number. - * @addr: The dma address local memory to expose to the peer. + * @pidx: Port index of peer device. + * @widx: Memory window index. + * @addr: The dma address of local memory to expose to the peer. * @size: The size of the local memory to expose to the peer. * * Set the translation of a memory window. The peer may access local memory * through the window starting at the address, up to the size. The address - * must be aligned to the alignment specified by ntb_mw_get_range(). The size - * must be aligned to the size alignment specified by ntb_mw_get_range(). + * and size must be aligned in compliance with restrictions of + * ntb_mw_get_align(). The region size should not exceed the size_max parameter + * of that method. + * + * This method may not be implemented due to the hardware specific memory + * windows interface. * * Return: Zero on success, otherwise an error number. */ -static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int idx, +static inline int ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx, dma_addr_t addr, resource_size_t size) { - return ntb->ops->mw_set_trans(ntb, idx, addr, size); + if (!ntb->ops->mw_set_trans) + return 0; + + return ntb->ops->mw_set_trans(ntb, pidx, widx, addr, size); } /** - * ntb_mw_clear_trans() - clear the translation of a memory window + * ntb_mw_clear_trans() - clear the translation address of an inbound memory + * window * @ntb: NTB device context. - * @idx: Memory window number. + * @pidx: Port index of peer device. + * @widx: Memory window index. * - * Clear the translation of a memory window. The peer may no longer access - * local memory through the window. + * Clear the translation of an inbound memory window. The peer may no longer + * access local memory through the window. * * Return: Zero on success, otherwise an error number. */ -static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx) +static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int pidx, int widx) { if (!ntb->ops->mw_clear_trans) - return ntb->ops->mw_set_trans(ntb, idx, 0, 0); + return ntb_mw_set_trans(ntb, pidx, widx, 0, 0); + + return ntb->ops->mw_clear_trans(ntb, pidx, widx); +} + +/** + * ntb_peer_mw_count() - get the number of outbound memory windows, which could + * be mapped to access a shared memory + * @ntb: NTB device context. + * + * Hardware and topology may support a different number of memory windows. + * This method returns the number of outbound memory windows supported by + * local device. + * + * Return: the number of memory windows. + */ +static inline int ntb_peer_mw_count(struct ntb_dev *ntb) +{ + return ntb->ops->peer_mw_count(ntb); +} + +/** + * ntb_peer_mw_get_addr() - get map address of an outbound memory window + * @ntb: NTB device context. + * @widx: Memory window index (within ntb_peer_mw_count() return value). + * @base: OUT - the base address of mapping region. + * @size: OUT - the size of mapping region. + * + * Get base and size of memory region to map. NULL may be given for any output + * parameter if the value is not needed. The base and size may be used for + * mapping the memory window, to access the peer memory. + * + * Return: Zero on success, otherwise a negative error number. + */ +static inline int ntb_peer_mw_get_addr(struct ntb_dev *ntb, int widx, + phys_addr_t *base, resource_size_t *size) +{ + return ntb->ops->peer_mw_get_addr(ntb, widx, base, size); +} + +/** + * ntb_peer_mw_set_trans() - set a translation address of a memory window + * retrieved from a peer device + * @ntb: NTB device context. + * @pidx: Port index of peer device the translation address received from. + * @widx: Memory window index. + * @addr: The dma address of the shared memory to access. + * @size: The size of the shared memory to access. + * + * Set the translation of an outbound memory window. The local device may + * access shared memory allocated by a peer device sent the address. + * + * This method may not be implemented due to the hardware specific memory + * windows interface, so a translation address can be only set on the side, + * where shared memory (inbound memory windows) is allocated. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx, + u64 addr, resource_size_t size) +{ + if (!ntb->ops->peer_mw_set_trans) + return 0; + + return ntb->ops->peer_mw_set_trans(ntb, pidx, widx, addr, size); +} + +/** + * ntb_peer_mw_clear_trans() - clear the translation address of an outbound + * memory window + * @ntb: NTB device context. + * @pidx: Port index of peer device. + * @widx: Memory window index. + * + * Clear the translation of a outbound memory window. The local device may no + * longer access a shared memory through the window. + * + * This method may not be implemented due to the hardware specific memory + * windows interface. + * + * Return: Zero on success, otherwise an error number. + */ +static inline int ntb_peer_mw_clear_trans(struct ntb_dev *ntb, int pidx, + int widx) +{ + if (!ntb->ops->peer_mw_clear_trans) + return ntb_peer_mw_set_trans(ntb, pidx, widx, 0, 0); - return ntb->ops->mw_clear_trans(ntb, idx); + return ntb->ops->peer_mw_clear_trans(ntb, pidx, widx); } /** -- cgit v1.2.3 From d67288a39584daad11edee9b03d53264ba147453 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 11 Jan 2017 03:13:20 +0300 Subject: NTB: Alter Scratchpads API to support multi-ports devices Even though there is no any real NTB hardware, which would have both more than two ports and Scratchpad registers, it is logically correct to have Scratchpad API accepting a peer port index as well. Intel/AMD drivers utilize Primary and Secondary topology to split Scratchpad between connected root devices. Since port-index API introduced, Intel/AMD NTB hardware drivers can use device port to determine which Scratchpad registers actually belong to local and peer devices. The same approach can be used if some potential hardware in future will be multi-port and have some set of Scratchpads. Here are the brief of changes in the API: ntb_spad_count() - return number of Scratchpads per each port ntb_peer_spad_addr(pidx, sidx) - address of Scratchpad register of the peer device with pidx-index ntb_peer_spad_read(pidx, sidx) - read specified Scratchpad register of the peer with pidx-index ntb_peer_spad_write(pidx, sidx) - write data to Scratchpad register of the peer with pidx-index Since there is hardware which doesn't support Scratchpad registers, the corresponding API methods are now made optional. Signed-off-by: Serge Semin Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 73 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index 2ea83f91a236..4e3cd56af732 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -288,13 +288,14 @@ struct ntb_dev_ops { int (*spad_is_unsafe)(struct ntb_dev *ntb); int (*spad_count)(struct ntb_dev *ntb); - u32 (*spad_read)(struct ntb_dev *ntb, int idx); - int (*spad_write)(struct ntb_dev *ntb, int idx, u32 val); + u32 (*spad_read)(struct ntb_dev *ntb, int sidx); + int (*spad_write)(struct ntb_dev *ntb, int sidx, u32 val); - int (*peer_spad_addr)(struct ntb_dev *ntb, int idx, + int (*peer_spad_addr)(struct ntb_dev *ntb, int pidx, int sidx, phys_addr_t *spad_addr); - u32 (*peer_spad_read)(struct ntb_dev *ntb, int idx); - int (*peer_spad_write)(struct ntb_dev *ntb, int idx, u32 val); + u32 (*peer_spad_read)(struct ntb_dev *ntb, int pidx, int sidx); + int (*peer_spad_write)(struct ntb_dev *ntb, int pidx, int sidx, + u32 val); }; static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) @@ -335,13 +336,12 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) /* ops->peer_db_read_mask && */ /* ops->peer_db_set_mask && */ /* ops->peer_db_clear_mask && */ - /* ops->spad_is_unsafe && */ - ops->spad_count && - ops->spad_read && - ops->spad_write && - /* ops->peer_spad_addr && */ - /* ops->peer_spad_read && */ - ops->peer_spad_write && + /* !ops->spad_is_unsafe == !ops->spad_count && */ + !ops->spad_read == !ops->spad_count && + !ops->spad_write == !ops->spad_count && + /* !ops->peer_spad_addr == !ops->spad_count && */ + /* !ops->peer_spad_read == !ops->spad_count && */ + !ops->peer_spad_write == !ops->spad_count && 1; } @@ -1176,47 +1176,58 @@ static inline int ntb_spad_is_unsafe(struct ntb_dev *ntb) * @ntb: NTB device context. * * Hardware and topology may support a different number of scratchpads. + * Although it must be the same for all ports per NTB device. * * Return: the number of scratchpads. */ static inline int ntb_spad_count(struct ntb_dev *ntb) { + if (!ntb->ops->spad_count) + return 0; + return ntb->ops->spad_count(ntb); } /** * ntb_spad_read() - read the local scratchpad register * @ntb: NTB device context. - * @idx: Scratchpad index. + * @sidx: Scratchpad index. * * Read the local scratchpad register, and return the value. * * Return: The value of the local scratchpad register. */ -static inline u32 ntb_spad_read(struct ntb_dev *ntb, int idx) +static inline u32 ntb_spad_read(struct ntb_dev *ntb, int sidx) { - return ntb->ops->spad_read(ntb, idx); + if (!ntb->ops->spad_read) + return ~(u32)0; + + return ntb->ops->spad_read(ntb, sidx); } /** * ntb_spad_write() - write the local scratchpad register * @ntb: NTB device context. - * @idx: Scratchpad index. + * @sidx: Scratchpad index. * @val: Scratchpad value. * * Write the value to the local scratchpad register. * * Return: Zero on success, otherwise an error number. */ -static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val) +static inline int ntb_spad_write(struct ntb_dev *ntb, int sidx, u32 val) { - return ntb->ops->spad_write(ntb, idx, val); + if (!ntb->ops->spad_write) + return -EINVAL; + + return ntb->ops->spad_write(ntb, sidx, val); } /** * ntb_peer_spad_addr() - address of the peer scratchpad register * @ntb: NTB device context. - * @idx: Scratchpad index. + * @pidx: Port index of peer device. + * @sidx: Scratchpad index. * @spad_addr: OUT - The address of the peer scratchpad register. * * Return the address of the peer doorbell register. This may be used, for @@ -1224,45 +1235,51 @@ static inline int ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val) * * Return: Zero on success, otherwise an error number. */ -static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int idx, +static inline int ntb_peer_spad_addr(struct ntb_dev *ntb, int pidx, int sidx, phys_addr_t *spad_addr) { if (!ntb->ops->peer_spad_addr) return -EINVAL; - return ntb->ops->peer_spad_addr(ntb, idx, spad_addr); + return ntb->ops->peer_spad_addr(ntb, pidx, sidx, spad_addr); } /** * ntb_peer_spad_read() - read the peer scratchpad register * @ntb: NTB device context. - * @idx: Scratchpad index. + * @pidx: Port index of peer device. + * @sidx: Scratchpad index. * * Read the peer scratchpad register, and return the value. * * Return: The value of the local scratchpad register. */ -static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int idx) +static inline u32 ntb_peer_spad_read(struct ntb_dev *ntb, int pidx, int sidx) { if (!ntb->ops->peer_spad_read) - return 0; + return ~(u32)0; - return ntb->ops->peer_spad_read(ntb, idx); + return ntb->ops->peer_spad_read(ntb, pidx, sidx); } /** * ntb_peer_spad_write() - write the peer scratchpad register * @ntb: NTB device context. - * @idx: Scratchpad index. + * @pidx: Port index of peer device. + * @sidx: Scratchpad index. * @val: Scratchpad value. * * Write the value to the peer scratchpad register. * * Return: Zero on success, otherwise an error number. */ -static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32 val) +static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int pidx, int sidx, + u32 val) { - return ntb->ops->peer_spad_write(ntb, idx, val); + if (!ntb->ops->peer_spad_write) + return -EINVAL; + + return ntb->ops->peer_spad_write(ntb, pidx, sidx, val); } #endif -- cgit v1.2.3 From bc3e49adc279c5505d6df8dd8c7fca45d6d3d21a Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Tue, 20 Dec 2016 12:48:20 +0300 Subject: NTB: Add Messaging NTB API Some IDT NTB-capable PCIe-switches have message registers to communicate with peer devices. This patch adds new NTB API callback methods, which can be used to utilize these registers functionality: ntb_msg_count(); - get number of message registers ntb_msg_inbits(); - get bitfield of inbound message registers status ntb_msg_outbits(); - get bitfield of outbound message registers status ntb_msg_read_sts(); - read the inbound and outbound message registers status ntb_msg_clear_sts(); - clear status bits of message registers ntb_msg_set_mask(); - mask interrupts raised by status bits of message registers. ntb_msg_clear_mask(); - clear interrupts mask bits of message registers ntb_msg_read(midx, *pidx); - read message register with specified index, additionally getting peer port index which data received from ntb_msg_write(midx, pidx); - write data to the specified message register sending it to the passed peer device connected over a pidx port ntb_msg_event(); - notify driver context of a new message event Of course there is hardware which doesn't support Message registers, so this API is made optional. Signed-off-by: Serge Semin Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 205 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) (limited to 'include') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index 4e3cd56af732..d59688f91618 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -178,10 +178,12 @@ static inline int ntb_client_ops_is_valid(const struct ntb_client_ops *ops) * struct ntb_ctx_ops - ntb driver context operations * @link_event: See ntb_link_event(). * @db_event: See ntb_db_event(). + * @msg_event: See ntb_msg_event(). */ struct ntb_ctx_ops { void (*link_event)(void *ctx); void (*db_event)(void *ctx, int db_vector); + void (*msg_event)(void *ctx); }; static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) @@ -190,6 +192,7 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) return /* ops->link_event && */ /* ops->db_event && */ + /* ops->msg_event && */ 1; } @@ -234,6 +237,15 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) * @peer_spad_addr: See ntb_peer_spad_addr(). * @peer_spad_read: See ntb_peer_spad_read(). * @peer_spad_write: See ntb_peer_spad_write(). + * @msg_count: See ntb_msg_count(). + * @msg_inbits: See ntb_msg_inbits(). + * @msg_outbits: See ntb_msg_outbits(). + * @msg_read_sts: See ntb_msg_read_sts(). + * @msg_clear_sts: See ntb_msg_clear_sts(). + * @msg_set_mask: See ntb_msg_set_mask(). + * @msg_clear_mask: See ntb_msg_clear_mask(). + * @msg_read: See ntb_msg_read(). + * @msg_write: See ntb_msg_write(). */ struct ntb_dev_ops { int (*port_number)(struct ntb_dev *ntb); @@ -296,6 +308,16 @@ struct ntb_dev_ops { u32 (*peer_spad_read)(struct ntb_dev *ntb, int pidx, int sidx); int (*peer_spad_write)(struct ntb_dev *ntb, int pidx, int sidx, u32 val); + + int (*msg_count)(struct ntb_dev *ntb); + u64 (*msg_inbits)(struct ntb_dev *ntb); + u64 (*msg_outbits)(struct ntb_dev *ntb); + u64 (*msg_read_sts)(struct ntb_dev *ntb); + int (*msg_clear_sts)(struct ntb_dev *ntb, u64 sts_bits); + int (*msg_set_mask)(struct ntb_dev *ntb, u64 mask_bits); + int (*msg_clear_mask)(struct ntb_dev *ntb, u64 mask_bits); + int (*msg_read)(struct ntb_dev *ntb, int midx, int *pidx, u32 *msg); + int (*msg_write)(struct ntb_dev *ntb, int midx, int pidx, u32 msg); }; static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) @@ -342,6 +364,15 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) /* !ops->peer_spad_addr == !ops->spad_count && */ /* !ops->peer_spad_read == !ops->spad_count && */ !ops->peer_spad_write == !ops->spad_count && + + !ops->msg_inbits == !ops->msg_count && + !ops->msg_outbits == !ops->msg_count && + !ops->msg_read_sts == !ops->msg_count && + !ops->msg_clear_sts == !ops->msg_count && + /* !ops->msg_set_mask == !ops->msg_count && */ + /* !ops->msg_clear_mask == !ops->msg_count && */ + !ops->msg_read == !ops->msg_count && + !ops->msg_write == !ops->msg_count && 1; } @@ -484,6 +515,18 @@ void ntb_link_event(struct ntb_dev *ntb); */ void ntb_db_event(struct ntb_dev *ntb, int vector); +/** + * ntb_msg_event() - notify driver context of a message event + * @ntb: NTB device context. + * + * Notify the driver context of a message event. If hardware supports + * message registers, this event indicates, that a new message arrived in + * some incoming message register or last sent message couldn't be delivered. + * The events can be masked/unmasked by the methods ntb_msg_set_mask() and + * ntb_msg_clear_mask(). + */ +void ntb_msg_event(struct ntb_dev *ntb); + /** * ntb_default_port_number() - get the default local port number * @ntb: NTB device context. @@ -1282,4 +1325,166 @@ static inline int ntb_peer_spad_write(struct ntb_dev *ntb, int pidx, int sidx, return ntb->ops->peer_spad_write(ntb, pidx, sidx, val); } +/** + * ntb_msg_count() - get the number of message registers + * @ntb: NTB device context. + * + * Hardware may support a different number of message registers. + * + * Return: the number of message registers. + */ +static inline int ntb_msg_count(struct ntb_dev *ntb) +{ + if (!ntb->ops->msg_count) + return 0; + + return ntb->ops->msg_count(ntb); +} + +/** + * ntb_msg_inbits() - get a bitfield of inbound message registers status + * @ntb: NTB device context. + * + * The method returns the bitfield of status and mask registers, which related + * to inbound message registers. + * + * Return: bitfield of inbound message registers. + */ +static inline u64 ntb_msg_inbits(struct ntb_dev *ntb) +{ + if (!ntb->ops->msg_inbits) + return 0; + + return ntb->ops->msg_inbits(ntb); +} + +/** + * ntb_msg_outbits() - get a bitfield of outbound message registers status + * @ntb: NTB device context. + * + * The method returns the bitfield of status and mask registers, which related + * to outbound message registers. + * + * Return: bitfield of outbound message registers. + */ +static inline u64 ntb_msg_outbits(struct ntb_dev *ntb) +{ + if (!ntb->ops->msg_outbits) + return 0; + + return ntb->ops->msg_outbits(ntb); +} + +/** + * ntb_msg_read_sts() - read the message registers status + * @ntb: NTB device context. + * + * Read the status of message register. Inbound and outbound message registers + * related bits can be filtered by masks retrieved from ntb_msg_inbits() and + * ntb_msg_outbits(). + * + * Return: status bits of message registers + */ +static inline u64 ntb_msg_read_sts(struct ntb_dev *ntb) +{ + if (!ntb->ops->msg_read_sts) + return 0; + + return ntb->ops->msg_read_sts(ntb); +} + +/** + * ntb_msg_clear_sts() - clear status bits of message registers + * @ntb: NTB device context. + * @sts_bits: Status bits to clear. + * + * Clear bits in the status register. + * + * Return: Zero on success, otherwise a negative error number. + */ +static inline int ntb_msg_clear_sts(struct ntb_dev *ntb, u64 sts_bits) +{ + if (!ntb->ops->msg_clear_sts) + return -EINVAL; + + return ntb->ops->msg_clear_sts(ntb, sts_bits); +} + +/** + * ntb_msg_set_mask() - set mask of message register status bits + * @ntb: NTB device context. + * @mask_bits: Mask bits. + * + * Mask the message registers status bits from raising the message event. + * + * Return: Zero on success, otherwise a negative error number. + */ +static inline int ntb_msg_set_mask(struct ntb_dev *ntb, u64 mask_bits) +{ + if (!ntb->ops->msg_set_mask) + return -EINVAL; + + return ntb->ops->msg_set_mask(ntb, mask_bits); +} + +/** + * ntb_msg_clear_mask() - clear message registers mask + * @ntb: NTB device context. + * @mask_bits: Mask bits to clear. + * + * Clear bits in the message events mask register. + * + * Return: Zero on success, otherwise a negative error number. + */ +static inline int ntb_msg_clear_mask(struct ntb_dev *ntb, u64 mask_bits) +{ + if (!ntb->ops->msg_clear_mask) + return -EINVAL; + + return ntb->ops->msg_clear_mask(ntb, mask_bits); +} + +/** + * ntb_msg_read() - read message register with specified index + * @ntb: NTB device context. + * @midx: Message register index + * @pidx: OUT - Port index of peer device a message retrieved from + * @msg: OUT - Data + * + * Read data from the specified message register. Source port index of a + * message is retrieved as well. + * + * Return: Zero on success, otherwise a negative error number. + */ +static inline int ntb_msg_read(struct ntb_dev *ntb, int midx, int *pidx, + u32 *msg) +{ + if (!ntb->ops->msg_read) + return -EINVAL; + + return ntb->ops->msg_read(ntb, midx, pidx, msg); +} + +/** + * ntb_msg_write() - write data to the specified message register + * @ntb: NTB device context. + * @midx: Message register index + * @pidx: Port index of peer device a message being sent to + * @msg: Data to send + * + * Send data to a specified peer device using the defined message register. + * Message event can be raised if the midx registers isn't empty while + * calling this method and the corresponding interrupt isn't masked. + * + * Return: Zero on success, otherwise a negative error number. + */ +static inline int ntb_msg_write(struct ntb_dev *ntb, int midx, int pidx, + u32 msg) +{ + if (!ntb->ops->msg_write) + return -EINVAL; + + return ntb->ops->msg_write(ntb, midx, pidx, msg); +} + #endif -- cgit v1.2.3 From 85dce3aaae98a8440f4a1a2404bcbab890574b46 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 14 Dec 2016 02:49:20 +0300 Subject: NTB: Add PCIe Gen4 link speed Signed-off-by: Serge Semin Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index d59688f91618..a6f569727845 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -108,6 +108,7 @@ static inline char *ntb_topo_string(enum ntb_topo topo) * @NTB_SPEED_GEN1: Link is trained to gen1 speed. * @NTB_SPEED_GEN2: Link is trained to gen2 speed. * @NTB_SPEED_GEN3: Link is trained to gen3 speed. + * @NTB_SPEED_GEN4: Link is trained to gen4 speed. */ enum ntb_speed { NTB_SPEED_AUTO = -1, @@ -115,6 +116,7 @@ enum ntb_speed { NTB_SPEED_GEN1 = 1, NTB_SPEED_GEN2 = 2, NTB_SPEED_GEN3 = 3, + NTB_SPEED_GEN4 = 4 }; /** -- cgit v1.2.3 From 3c69f5d6731c43a5b6b9e78b385948e8d76460be Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Tue, 20 Dec 2016 12:50:09 +0300 Subject: NTB: Add ntb.h comments Signed-off-by: Serge Semin Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index a6f569727845..609e232c00da 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -326,12 +326,17 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) { /* commented callbacks are not required: */ return + /* Port operations are required for multiport devices */ !ops->peer_port_count == !ops->port_number && !ops->peer_port_number == !ops->port_number && !ops->peer_port_idx == !ops->port_number && + + /* Link operations are required */ ops->link_is_up && ops->link_enable && ops->link_disable && + + /* One or both MW interfaces should be developed */ ops->mw_count && ops->mw_get_align && (ops->mw_set_trans || @@ -341,12 +346,11 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) ops->peer_mw_get_addr && /* ops->peer_mw_clear_trans && */ + /* Doorbell operations are mostly required */ /* ops->db_is_unsafe && */ ops->db_valid_mask && - /* both set, or both unset */ - (!ops->db_vector_count == !ops->db_vector_mask) && - + (!ops->db_vector_count == !ops->db_vector_mask) && ops->db_read && /* ops->db_set && */ ops->db_clear && @@ -360,6 +364,8 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) /* ops->peer_db_read_mask && */ /* ops->peer_db_set_mask && */ /* ops->peer_db_clear_mask && */ + + /* Scrachpads interface is optional */ /* !ops->spad_is_unsafe == !ops->spad_count && */ !ops->spad_read == !ops->spad_count && !ops->spad_write == !ops->spad_count && @@ -367,6 +373,7 @@ static inline int ntb_dev_ops_is_valid(const struct ntb_dev_ops *ops) /* !ops->peer_spad_read == !ops->spad_count && */ !ops->peer_spad_write == !ops->spad_count && + /* Messaging interface is optional */ !ops->msg_inbits == !ops->msg_count && !ops->msg_outbits == !ops->msg_count && !ops->msg_read_sts == !ops->msg_count && @@ -387,13 +394,12 @@ struct ntb_client { struct device_driver drv; const struct ntb_client_ops ops; }; - #define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv) /** * struct ntb_device - ntb device * @dev: Linux device object. - * @pdev: Pci device entry of the ntb. + * @pdev: PCI device entry of the ntb. * @topo: Detected topology of the ntb. * @ops: See &ntb_dev_ops. * @ctx: See &ntb_ctx_ops. @@ -414,7 +420,6 @@ struct ntb_dev { /* block unregister until device is fully released */ struct completion released; }; - #define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev) /** @@ -511,7 +516,7 @@ void ntb_link_event(struct ntb_dev *ntb); * multiple interrupt vectors for doorbells, the vector number indicates which * vector received the interrupt. The vector number is relative to the first * vector used for doorbells, starting at zero, and must be less than - ** ntb_db_vector_count(). The driver may call ntb_db_read() to check which + * ntb_db_vector_count(). The driver may call ntb_db_read() to check which * doorbell bits need service, and ntb_db_vector_mask() to determine which of * those bits are associated with the vector number. */ -- cgit v1.2.3 From 5465e7d3b99bbaa823ae4f8e538543e7d6cdc530 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sat, 3 Jun 2017 06:55:50 -0700 Subject: target: Add TARGET_SCF_LOOKUP_LUN_FROM_TAG support for ABORT_TASK This patch introduces support in target_submit_tmr() for locating a unpacked_lun from an existing se_cmd->tag during ABORT_TASK. When TARGET_SCF_LOOKUP_LUN_FROM_TAG is set, target_submit_tmr() will do the extra lookup via target_lookup_lun_from_tag() and subsequently invoke transport_lookup_tmr_lun() so a proper percpu se_lun->lun_ref is taken before workqueue dispatch into se_device->tmr_wq happens. Aside from the extra target_lookup_lun_from_tag(), the existing code-path remains unchanged. Reviewed-by: Himanshu Madhani Reviewed-by: Quinn Tran Cc: Mike Christie Cc: Hannes Reinecke Cc: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- include/target/target_core_base.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 47d9f381209f..f835528e424e 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -188,7 +188,8 @@ enum target_sc_flags_table { TARGET_SCF_BIDI_OP = 0x01, TARGET_SCF_ACK_KREF = 0x02, TARGET_SCF_UNKNOWN_SIZE = 0x04, - TARGET_SCF_USE_CPUID = 0x08, + TARGET_SCF_USE_CPUID = 0x08, + TARGET_SCF_LOOKUP_LUN_FROM_TAG = 0x10, }; /* fabric independent task management function values */ -- cgit v1.2.3 From 9f2f342892e15f8600939ec8d06caf963ccff880 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 23 May 2017 16:48:23 -0700 Subject: target: Remove se_device.dev_list The last user of se_device.dev_list was removed through commit 0fd97ccf45be ("target: kill struct se_subsystem_dev"). Hence also remove se_device.dev_list. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Cc: Andy Grover Cc: David Disseldorp Signed-off-by: Nicholas Bellinger --- include/target/target_core_base.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index f835528e424e..a3af69fcf75e 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -816,8 +816,6 @@ struct se_device { unsigned char udev_path[SE_UDEV_PATH_LEN]; /* Pointer to template of function pointers for transport */ const struct target_backend_ops *transport; - /* Linked list for struct se_hba struct se_device list */ - struct list_head dev_list; struct se_lun xcopy_lun; /* Protection Information */ int prot_length; -- cgit v1.2.3 From a85d667e58bddf73be84d1981b41eaac985ed216 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 23 May 2017 16:48:27 -0700 Subject: target: Use {get,put}_unaligned_be*() instead of open coding these functions Introduce the function get_unaligned_be24(). Use {get,put}_unaligned_be*() where appropriate. This patch does not change any functionality. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Cc: Andy Grover Cc: David Disseldorp Signed-off-by: Nicholas Bellinger --- include/target/target_core_backend.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index e475531565fd..b76071161cdc 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -2,6 +2,7 @@ #define TARGET_CORE_BACKEND_H #include +#include #include #define TRANSPORT_FLAG_PASSTHROUGH 0x1 @@ -109,4 +110,11 @@ sector_t target_to_linux_sector(struct se_device *dev, sector_t lb); bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, struct request_queue *q); + +/* Only use get_unaligned_be24() if reading p - 1 is allowed. */ +static inline uint32_t get_unaligned_be24(const uint8_t *const p) +{ + return get_unaligned_be32(p - 1) & 0xffffffU; +} + #endif /* TARGET_CORE_BACKEND_H */ -- cgit v1.2.3 From 03db016a1bf2d35f41c08aad2ca4f4f18eeda4be Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 2 Jun 2017 23:33:56 -0700 Subject: iscsi-target: Kill left-over iscsi_target_do_cleanup With commit 25cdda95fda7 in place to address the initial login PDU asynchronous socket close OOPs, go ahead and kill off the left-over iscsi_target_do_cleanup() and ->login_cleanup_work. Reported-by: Mike Christie Cc: Mike Christie Signed-off-by: Nicholas Bellinger --- include/target/iscsi/iscsi_target_core.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 5f17fb770477..7948fc68286b 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -560,7 +560,6 @@ struct iscsi_conn { #define LOGIN_FLAGS_INITIAL_PDU 8 unsigned long login_flags; struct delayed_work login_work; - struct delayed_work login_cleanup_work; struct iscsi_login *login; struct timer_list nopin_timer; struct timer_list nopin_response_timer; -- cgit v1.2.3 From c00e6220231542c6409780a3e9bfa44be7d94f3a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 23 May 2017 16:48:28 -0700 Subject: target: Introduce a function that shows the command state Introduce target_show_cmd() and use it where appropriate. If transport_wait_for_tasks() takes too long, make it show the state of the command it is waiting for. (Add missing brackets around multi-line conditions - nab) Signed-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Christoph Hellwig Cc: Andy Grover Cc: David Disseldorp Signed-off-by: Nicholas Bellinger --- include/target/target_core_fabric.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index d7dd1427fe0d..33d2e3e5773c 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -160,6 +160,7 @@ int target_get_sess_cmd(struct se_cmd *, bool); int target_put_sess_cmd(struct se_cmd *); void target_sess_cmd_list_set_waiting(struct se_session *); void target_wait_for_sess_cmds(struct se_session *); +void target_show_cmd(const char *pfx, struct se_cmd *cmd); int core_alua_check_nonop_delay(struct se_cmd *); -- cgit v1.2.3 From 1068be7bd4b05ca41a6a8de724f52a9c87861412 Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Tue, 6 Jun 2017 09:28:49 -0500 Subject: tcmu: Add netlink for device reconfiguration This gives tcmu the ability to handle events that can cause reconfiguration, such as resize, path changes, write_cache, etc... Signed-off-by: Bryant G. Ly Reviewed-By: Mike Christie Signed-off-by: Nicholas Bellinger --- include/uapi/linux/target_core_user.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index af17b4154ef6..403a61faada0 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -130,6 +130,7 @@ enum tcmu_genl_cmd { TCMU_CMD_UNSPEC, TCMU_CMD_ADDED_DEVICE, TCMU_CMD_REMOVED_DEVICE, + TCMU_CMD_RECONFIG_DEVICE, __TCMU_CMD_MAX, }; #define TCMU_CMD_MAX (__TCMU_CMD_MAX - 1) -- cgit v1.2.3 From 8a45885c1514cdae2ee64b5ac03ffc00a1a8a9d7 Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Tue, 6 Jun 2017 09:28:52 -0500 Subject: tcmu: Add Type of reconfig into netlink This patch adds more info about the attribute being changed, so that usersapce can easily figure out what is happening. Signed-off-by: Bryant G. Ly Reviewed-By: Mike Christie Signed-off-by: Nicholas Bellinger --- include/uapi/linux/target_core_user.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index 403a61faada0..5b00e3500005 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -139,8 +139,16 @@ enum tcmu_genl_attr { TCMU_ATTR_UNSPEC, TCMU_ATTR_DEVICE, TCMU_ATTR_MINOR, + TCMU_ATTR_TYPE, __TCMU_ATTR_MAX, }; #define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1) +enum tcmu_reconfig_types { + NO_RECONFIG, + CONFIG_PATH, + CONFIG_SIZE, + CONFIG_WRITECACHE, +}; + #endif -- cgit v1.2.3 From 2d76443e02f260d7a5bd0ede1851ae5534f0c68d Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 12 Jun 2017 01:34:28 -0500 Subject: tcmu: reconfigure netlink attr changes 1. TCMU_ATTR_TYPE is too generic when it describes only the reconfiguration type, so rename to TCMU_ATTR_RECONFIG_TYPE. 2. Only return the reconfig type when it is a TCMU_CMD_RECONFIG_DEVICE command. 3. CONFIG_* type is not needed. We can pass the value along with an ATTR to userspace, so it does not need to read sysfs/configfs. 4. Fix leak in tcmu_dev_path_store and rename to dev_config to reflect it is more than just a path that can be changed. 6. Don't update kernel struct value if netlink sending fails. Signed-off-by: Mike Christie Reviewed-by: "Bryant G. Ly" Signed-off-by: Nicholas Bellinger --- include/uapi/linux/target_core_user.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index 5b00e3500005..4bfc9a1b635c 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -139,16 +139,12 @@ enum tcmu_genl_attr { TCMU_ATTR_UNSPEC, TCMU_ATTR_DEVICE, TCMU_ATTR_MINOR, - TCMU_ATTR_TYPE, + TCMU_ATTR_PAD, + TCMU_ATTR_DEV_CFG, + TCMU_ATTR_DEV_SIZE, + TCMU_ATTR_WRITECACHE, __TCMU_ATTR_MAX, }; #define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1) -enum tcmu_reconfig_types { - NO_RECONFIG, - CONFIG_PATH, - CONFIG_SIZE, - CONFIG_WRITECACHE, -}; - #endif -- cgit v1.2.3 From 926347061ef1f4d3873829fd1960c6e4b965aa9f Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 23 Jun 2017 01:18:12 -0500 Subject: target: break up free_device callback With this patch free_device is now used to free what is allocated in the alloc_device callback and destroy_device tears down the resources that are setup in the configure_device callback. This patch will be needed in the next patch where tcmu needs to be able to look up the device in the destroy callback. Signed-off-by: Mike Christie Reviewed-by: Bart Van Assche Signed-off-by: Nicholas Bellinger --- include/target/target_core_backend.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index b76071161cdc..3dbcacd7e8d7 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -30,6 +30,7 @@ struct target_backend_ops { struct se_device *(*alloc_device)(struct se_hba *, const char *); int (*configure_device)(struct se_device *); + void (*destroy_device)(struct se_device *); void (*free_device)(struct se_device *device); ssize_t (*set_configfs_dev_params)(struct se_device *, -- cgit v1.2.3 From 0a5eee647b78e53da05e081362f42a11b4b674eb Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 23 Jun 2017 01:18:13 -0500 Subject: target: use idr for se_device dev index In the next patches we will add tcmu netlink support that allows userspace to send commands to target_core_user. To execute operations on a se_device/tcmu_dev we need to be able to look up a dev by any old id. This patch replaces the se_device->dev_index with a idr created id. The next patches will also remove the g_device_list and replace it with the idr. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- include/target/target_core_base.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index a3af69fcf75e..51a92f17352c 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -219,7 +219,6 @@ enum tcm_tmrsp_table { */ typedef enum { SCSI_INST_INDEX, - SCSI_DEVICE_INDEX, SCSI_AUTH_INTR_INDEX, SCSI_INDEX_TYPE_MAX } scsi_index_t; -- cgit v1.2.3 From 85441e6b8c97964a6da72135dc21f708adbdc4d8 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 23 Jun 2017 01:18:14 -0500 Subject: target: add helper to find se_device by dev_index This adds a helper to find a se_device by dev_index. It will be used in the next patches so tcmu's netlink interface can execute commands on specific devices. Signed-off-by: Mike Christie Reviewed-by: Bart Van Assche Signed-off-by: Nicholas Bellinger --- include/target/target_core_backend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 3dbcacd7e8d7..1f2b7007f2df 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -106,6 +106,8 @@ bool target_lun_is_rdonly(struct se_cmd *); sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd, sense_reason_t (*exec_cmd)(struct se_cmd *cmd)); +struct se_device *target_find_device(int id, bool do_depend); + bool target_sense_desc_format(struct se_device *dev); sector_t target_to_linux_sector(struct se_device *dev, sector_t lb); bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, -- cgit v1.2.3 From b3af66e24393f03ef81db17a11387d9e6174bd01 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 23 Jun 2017 01:18:15 -0500 Subject: tcmu: perfom device add, del and reconfig synchronously This makes the device add, del reconfig operations sync. It fixes the issue where for add and reconfig, we do not know if userspace successfully completely the operation, so we leave invalid kernel structs or report incorrect status for the config/reconfig operations. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- include/uapi/linux/target_core_user.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index 4bfc9a1b635c..24a1c4ec2248 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -131,6 +131,10 @@ enum tcmu_genl_cmd { TCMU_CMD_ADDED_DEVICE, TCMU_CMD_REMOVED_DEVICE, TCMU_CMD_RECONFIG_DEVICE, + TCMU_CMD_ADDED_DEVICE_DONE, + TCMU_CMD_REMOVED_DEVICE_DONE, + TCMU_CMD_RECONFIG_DEVICE_DONE, + TCMU_CMD_SET_FEATURES, __TCMU_CMD_MAX, }; #define TCMU_CMD_MAX (__TCMU_CMD_MAX - 1) @@ -143,6 +147,9 @@ enum tcmu_genl_attr { TCMU_ATTR_DEV_CFG, TCMU_ATTR_DEV_SIZE, TCMU_ATTR_WRITECACHE, + TCMU_ATTR_CMD_STATUS, + TCMU_ATTR_DEVICE_ID, + TCMU_ATTR_SUPP_KERN_CMD_REPLY, __TCMU_ATTR_MAX, }; #define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1) -- cgit v1.2.3 From be50f538e9a5081c61a78faf58c5591c94064633 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 23 Jun 2017 01:18:18 -0500 Subject: target: remove g_device_list g_device_list is no longer needed because we now use the idr code for lookups and seaches. Signed-off-by: Mike Christie Reviewed-by: Bart Van Assche Signed-off-by: Nicholas Bellinger --- include/target/target_core_base.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 51a92f17352c..516764febeb7 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -796,7 +796,6 @@ struct se_device { struct list_head delayed_cmd_list; struct list_head state_list; struct list_head qf_cmd_list; - struct list_head g_dev_node; /* Pointer to associated SE HBA */ struct se_hba *se_hba; /* T10 Inquiry and VPD WWN Information */ -- cgit v1.2.3 From c6d66aba98a39cfed206c5c61f0a604ba09b26ce Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 31 May 2017 15:52:39 -0500 Subject: target: add helper to copy sense to se_cmd buffer This adds a helper to copy sense from backend module buffer to the se_cmd's sense buffer. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- include/target/target_core_backend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 1f2b7007f2df..3757f5f54e03 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -73,6 +73,8 @@ void target_backend_unregister(const struct target_backend_ops *); void target_complete_cmd(struct se_cmd *, u8); void target_complete_cmd_with_length(struct se_cmd *, u8, int); +void transport_copy_sense_to_cmd(struct se_cmd *, unsigned char *); + sense_reason_t spc_parse_cdb(struct se_cmd *cmd, unsigned int *size); sense_reason_t spc_emulate_report_luns(struct se_cmd *cmd); sense_reason_t spc_emulate_inquiry_std(struct se_cmd *, unsigned char *); -- cgit v1.2.3 From 1a444175486026c1a280507f8d82094909acddd2 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 31 May 2017 15:52:42 -0500 Subject: target: remove transport_complete transport_complete is no longer used, so drop the code. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- include/target/target_core_backend.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 3757f5f54e03..e150e391878b 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -37,10 +37,6 @@ struct target_backend_ops { const char *, ssize_t); ssize_t (*show_configfs_dev_params)(struct se_device *, char *); - void (*transport_complete)(struct se_cmd *cmd, - struct scatterlist *, - unsigned char *); - sense_reason_t (*parse_cdb)(struct se_cmd *cmd); u32 (*get_device_type)(struct se_device *); sector_t (*get_blocks)(struct se_device *); -- cgit v1.2.3 From e5dc9a7055c98bcd7b03f9735d5f2ec2b7f0d897 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 28 Jun 2017 14:58:56 +0900 Subject: target: Use macro for WRITE_VERIFY_32 operation codes Add WRITE_VERIFY_32 definition to scsi prototypes and use this macro definition isntead of the hard coded value. (Drop WRITE_VERIFY_16 that's already part of another patch - nab) Signed-off-by: Damien Le Moal Signed-off-by: Nicholas Bellinger --- include/scsi/scsi_proto.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h index ce78ec8e367d..5e3fe037938e 100644 --- a/include/scsi/scsi_proto.h +++ b/include/scsi/scsi_proto.h @@ -161,6 +161,7 @@ #define READ_32 0x09 #define VERIFY_32 0x0a #define WRITE_32 0x0b +#define WRITE_VERIFY_32 0x0c #define WRITE_SAME_32 0x0d /* Values for T10/04-262r7 */ -- cgit v1.2.3 From 0e4524a5d341e719e8ee9ee7db5d58e2c5a4c10e Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 6 Jul 2017 14:44:28 +0200 Subject: KVM: mark vcpu->pid pointer as rcu protected We do use rcu to protect the pid pointer. Mark it as such and adopt all code to use the proper access methods. This was detected by sparse. "virt/kvm/kvm_main.c:2248:15: error: incompatible types in comparison expression (different address spaces)" Signed-off-by: Christian Borntraeger Reviewed-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0b50e7b35ed4..bcd37b855c66 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -234,7 +234,7 @@ struct kvm_vcpu { int guest_fpu_loaded, guest_xcr0_loaded; struct swait_queue_head wq; - struct pid *pid; + struct pid __rcu *pid; int sigset_active; sigset_t sigset; struct kvm_vcpu_stat stat; -- cgit v1.2.3 From 3068a254d5519cd5116f61297462da6d1aa84c20 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 6 Jul 2017 11:42:00 +0200 Subject: rtc: introduce new registration method Introduce rtc_register_device() to register an already allocated and initialized struct rtc_device. It automatically sets up the owner and the two steps allocation/registration will allow to remove race conditions in the IRQ handling of some driver. It also allows to properly extend the core without adding more arguments to rtc_device_register(). Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index d354f56e0cf5..8e4a5f44f59e 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -142,6 +142,8 @@ struct rtc_device { /* Some hardware can't support UIE mode */ int uie_unsupported; + bool registered; + #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL struct work_struct uie_task; struct timer_list uie_timer; @@ -163,6 +165,8 @@ extern struct rtc_device *devm_rtc_device_register(struct device *dev, const char *name, const struct rtc_class_ops *ops, struct module *owner); +struct rtc_device *devm_rtc_allocate_device(struct device *dev); +int __rtc_register_device(struct module *owner, struct rtc_device *rtc); extern void rtc_device_unregister(struct rtc_device *rtc); extern void devm_rtc_device_unregister(struct device *dev, struct rtc_device *rtc); @@ -218,6 +222,9 @@ static inline bool is_leap_year(unsigned int year) return (!(year % 4) && (year % 100)) || !(year % 400); } +#define rtc_register_device(device) \ + __rtc_register_device(THIS_MODULE, device) + #ifdef CONFIG_RTC_HCTOSYS_DEVICE extern int rtc_hctosys_ret; #else -- cgit v1.2.3 From 697e5a47aa12cdab6f2a8b284cc923cdf704eafc Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 6 Jul 2017 11:42:02 +0200 Subject: rtc: add generic nvmem support Many RTCs have an on board non volatile storage. It can be battery backed RAM or an EEPROM. Use the nvmem subsystem to export it to both userspace and in-kernel consumers. This stays compatible with the previous (non documented) ABI that was using /sys/class/rtc/rtcx/device/nvram to export that memory. But will warn about the deprecation. Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 8e4a5f44f59e..d53ecdc060cf 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -14,6 +14,7 @@ #include #include +#include #include extern int rtc_month_days(unsigned int month, unsigned int year); @@ -144,6 +145,12 @@ struct rtc_device { bool registered; + struct nvmem_config *nvmem_config; + struct nvmem_device *nvmem; + /* Old ABI support */ + bool nvram_old_abi; + struct bin_attribute *nvram; + #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL struct work_struct uie_task; struct timer_list uie_timer; -- cgit v1.2.3 From 4a12f95177280a660bda99e81838919b1cc6a91a Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 7 Jul 2017 10:51:38 +0200 Subject: KVM: mark kvm->busses as rcu protected mark kvm->busses as rcu protected and use the correct access function everywhere. found by sparse virt/kvm/kvm_main.c:3490:15: error: incompatible types in comparison expression (different address spaces) virt/kvm/kvm_main.c:3509:15: error: incompatible types in comparison expression (different address spaces) virt/kvm/kvm_main.c:3561:15: error: incompatible types in comparison expression (different address spaces) virt/kvm/kvm_main.c:3644:15: error: incompatible types in comparison expression (different address spaces) Signed-off-by: Christian Borntraeger --- include/linux/kvm_host.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bcd37b855c66..6a164f9eb02c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -404,7 +404,7 @@ struct kvm { int last_boosted_vcpu; struct list_head vm_list; struct mutex lock; - struct kvm_io_bus *buses[KVM_NR_BUSES]; + struct kvm_io_bus __rcu *buses[KVM_NR_BUSES]; #ifdef CONFIG_HAVE_KVM_EVENTFD struct { spinlock_t lock; @@ -473,6 +473,12 @@ struct kvm { #define vcpu_err(vcpu, fmt, ...) \ kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) +static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) +{ + return srcu_dereference_check(kvm->buses[idx], &kvm->srcu, + lockdep_is_held(&kvm->slots_lock)); +} + static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case -- cgit v1.2.3 From a80cf7b5f4149753d5f19c872a47e66195b167d4 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 6 Jul 2017 16:17:14 +0200 Subject: KVM: mark memory slots as rcu we access the memslots array via srcu. Mark it as such and use the right access functions also for the freeing of memory slots. Found by sparse: ./include/linux/kvm_host.h:565:16: error: incompatible types in comparison expression (different address spaces) Signed-off-by: Christian Borntraeger Reviewed-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6a164f9eb02c..b3ca77a96b2d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -390,7 +390,7 @@ struct kvm { spinlock_t mmu_lock; struct mutex slots_lock; struct mm_struct *mm; /* userspace tied to this vm */ - struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM]; + struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; /* -- cgit v1.2.3 From dcbbd97ccb9c6f4dad39875c1404d2643eaf110b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 5 Jun 2017 14:44:59 +0200 Subject: libceph: remove ceph_sanitize_features() workaround Reflects ceph.git commit ff1959282826ae6acd7134e1b1ede74ffd1cc04a. Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index fd8b2953c78f..4962708841b5 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -77,29 +77,8 @@ #define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING (1ULL<<58) /* New, v7 encoding */ #define CEPH_FEATURE_FS_FILE_LAYOUT_V2 (1ULL<<58) /* file_layout_t */ -/* - * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature - * vector to evaluate to 64 bit ~0. To cope, we designate 1ULL << 63 - * to mean 33 bit ~0, and introduce a helper below to do the - * translation. - * - * This was introduced by ceph.git commit - * 9ea02b84104045c2ffd7e7f4e7af512953855ecd v0.58-657-g9ea02b8 - * and fixed by ceph.git commit - * 4255b5c2fb54ae40c53284b3ab700fdfc7e61748 v0.65-263-g4255b5c - */ #define CEPH_FEATURE_RESERVED (1ULL<<63) -static inline u64 ceph_sanitize_features(u64 features) -{ - if (features & CEPH_FEATURE_RESERVED) { - /* everything through OSD_SNAPMAPPER */ - return 0x1ffffffffull; - } else { - return features; - } -} - /* * Features supported. */ -- cgit v1.2.3 From f179d3ba8cb9073c2d96315b79ff7bc658a1feee Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 5 Jun 2017 14:44:59 +0200 Subject: libceph: new features macros Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 242 +++++++++++++++++++++++++------------ 1 file changed, 167 insertions(+), 75 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 4962708841b5..7fce9ed44af0 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -2,82 +2,174 @@ #define __CEPH_FEATURES /* - * feature bits + * Each time we reclaim bits for reuse we need to specify another bit + * that, if present, indicates we have the new incarnation of that + * feature. Base case is 1 (first use). */ -#define CEPH_FEATURE_UID (1ULL<<0) -#define CEPH_FEATURE_NOSRCADDR (1ULL<<1) -#define CEPH_FEATURE_MONCLOCKCHECK (1ULL<<2) -#define CEPH_FEATURE_FLOCK (1ULL<<3) -#define CEPH_FEATURE_SUBSCRIBE2 (1ULL<<4) -#define CEPH_FEATURE_MONNAMES (1ULL<<5) -#define CEPH_FEATURE_RECONNECT_SEQ (1ULL<<6) -#define CEPH_FEATURE_DIRLAYOUTHASH (1ULL<<7) -#define CEPH_FEATURE_OBJECTLOCATOR (1ULL<<8) -#define CEPH_FEATURE_PGID64 (1ULL<<9) -#define CEPH_FEATURE_INCSUBOSDMAP (1ULL<<10) -#define CEPH_FEATURE_PGPOOL3 (1ULL<<11) -#define CEPH_FEATURE_OSDREPLYMUX (1ULL<<12) -#define CEPH_FEATURE_OSDENC (1ULL<<13) -#define CEPH_FEATURE_OMAP (1ULL<<14) -#define CEPH_FEATURE_MONENC (1ULL<<15) -#define CEPH_FEATURE_QUERY_T (1ULL<<16) -#define CEPH_FEATURE_INDEP_PG_MAP (1ULL<<17) -#define CEPH_FEATURE_CRUSH_TUNABLES (1ULL<<18) -#define CEPH_FEATURE_CHUNKY_SCRUB (1ULL<<19) -#define CEPH_FEATURE_MON_NULLROUTE (1ULL<<20) -#define CEPH_FEATURE_MON_GV (1ULL<<21) -#define CEPH_FEATURE_BACKFILL_RESERVATION (1ULL<<22) -#define CEPH_FEATURE_MSG_AUTH (1ULL<<23) -#define CEPH_FEATURE_RECOVERY_RESERVATION (1ULL<<24) -#define CEPH_FEATURE_CRUSH_TUNABLES2 (1ULL<<25) -#define CEPH_FEATURE_CREATEPOOLID (1ULL<<26) -#define CEPH_FEATURE_REPLY_CREATE_INODE (1ULL<<27) -#define CEPH_FEATURE_OSD_HBMSGS (1ULL<<28) -#define CEPH_FEATURE_MDSENC (1ULL<<29) -#define CEPH_FEATURE_OSDHASHPSPOOL (1ULL<<30) -#define CEPH_FEATURE_MON_SINGLE_PAXOS (1ULL<<31) -#define CEPH_FEATURE_OSD_SNAPMAPPER (1ULL<<32) -#define CEPH_FEATURE_MON_SCRUB (1ULL<<33) -#define CEPH_FEATURE_OSD_PACKED_RECOVERY (1ULL<<34) -#define CEPH_FEATURE_OSD_CACHEPOOL (1ULL<<35) -#define CEPH_FEATURE_CRUSH_V2 (1ULL<<36) /* new indep; SET_* steps */ -#define CEPH_FEATURE_EXPORT_PEER (1ULL<<37) -#define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38) -#define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38) /* overlap with EC */ -/* The process supports new-style OSDMap encoding. Monitors also use - this bit to determine if peers support NAK messages. */ -#define CEPH_FEATURE_OSDMAP_ENC (1ULL<<39) -#define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40) -#define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41) -#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */ -#define CEPH_FEATURE_MSGR_KEEPALIVE2 (1ULL<<42) -#define CEPH_FEATURE_OSD_POOLRESEND (1ULL<<43) -#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44) -#define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45) -#define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46) -#define CEPH_FEATURE_OSD_REPOP (1ULL<<46) /* overlap with fadvise */ -#define CEPH_FEATURE_OSD_OBJECT_DIGEST (1ULL<<46) /* overlap with fadvise */ -#define CEPH_FEATURE_OSD_TRANSACTION_MAY_LAYOUT (1ULL<<46) /* overlap w/ fadvise */ -#define CEPH_FEATURE_MDS_QUOTA (1ULL<<47) -#define CEPH_FEATURE_CRUSH_V4 (1ULL<<48) /* straw2 buckets */ -#define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49) -// duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY -#define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49) /* overlap w/ above */ -#define CEPH_FEATURE_MON_METADATA (1ULL<<50) -#define CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT (1ULL<<51) /* can sort objs bitwise */ -#define CEPH_FEATURE_OSD_PROXY_WRITE_FEATURES (1ULL<<52) -#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V3 (1ULL<<53) -#define CEPH_FEATURE_OSD_HITSET_GMT (1ULL<<54) -#define CEPH_FEATURE_HAMMER_0_94_4 (1ULL<<55) -#define CEPH_FEATURE_NEW_OSDOP_ENCODING (1ULL<<56) /* New, v7 encoding */ -#define CEPH_FEATURE_MON_STATEFUL_SUB (1ULL<<57) /* stateful mon subscription */ -#define CEPH_FEATURE_MON_ROUTE_OSDMAP (1ULL<<57) /* peon sends osdmaps */ -#define CEPH_FEATURE_CRUSH_TUNABLES5 (1ULL<<58) /* chooseleaf stable mode */ -// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5 -#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING (1ULL<<58) /* New, v7 encoding */ -#define CEPH_FEATURE_FS_FILE_LAYOUT_V2 (1ULL<<58) /* file_layout_t */ - -#define CEPH_FEATURE_RESERVED (1ULL<<63) +#define CEPH_FEATURE_INCARNATION_1 (0ull) +#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL + +#define DEFINE_CEPH_FEATURE(bit, incarnation, name) \ + const static uint64_t CEPH_FEATURE_##name = (1ULL< Date: Mon, 5 Jun 2017 14:45:00 +0200 Subject: libceph: advertise support for OSD_POOLRESEND The code has been in place since commit 63244fa123a7 ("libceph: introduce ceph_osd_request_target, calc_target()"), and, with the ceph_{oloc,oid}_copy() issue fixed in the previous commit, is now in working order. Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 7fce9ed44af0..89c68af48539 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -197,6 +197,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_CRUSH_TUNABLES3 | \ CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ CEPH_FEATURE_MSGR_KEEPALIVE2 | \ + CEPH_FEATURE_OSD_POOLRESEND | \ CEPH_FEATURE_CRUSH_V4 | \ CEPH_FEATURE_CRUSH_TUNABLES5 | \ CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING) -- cgit v1.2.3 From 220abf5aa7ba5f544f1b589bde33761c60bbf9a0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 5 Jun 2017 14:45:00 +0200 Subject: libceph: support SERVER_JEWEL feature bits Only MON_STATEFUL_SUB, really. MON_ROUTE_OSDMAP and OSDSUBOP_NO_SNAPCONTEXT are irrelevant. Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 89c68af48539..78a58770e6e9 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -199,6 +199,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_MSGR_KEEPALIVE2 | \ CEPH_FEATURE_OSD_POOLRESEND | \ CEPH_FEATURE_CRUSH_V4 | \ + CEPH_FEATURE_SERVER_JEWEL | \ + CEPH_FEATURE_MON_STATEFUL_SUB | \ CEPH_FEATURE_CRUSH_TUNABLES5 | \ CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING) -- cgit v1.2.3 From dc93e0e2831de2f80817b89aae4864b88332fcce Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 5 Jun 2017 14:45:00 +0200 Subject: libceph: fold [l]req->last_force_resend into ceph_osd_request_target Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 85650b415e73..ef630ebd1169 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -148,6 +148,8 @@ struct ceph_osd_request_target { unsigned int flags; /* CEPH_OSD_FLAG_* */ bool paused; + u32 last_force_resend; + int osd; }; @@ -193,7 +195,6 @@ struct ceph_osd_request { unsigned long r_stamp; /* jiffies, send or check time */ unsigned long r_start_stamp; /* jiffies */ int r_attempts; - u32 r_last_force_resend; u32 r_map_dne_bound; struct ceph_osd_req_op r_ops[]; @@ -221,7 +222,6 @@ struct ceph_osd_linger_request { struct list_head pending_lworks; struct ceph_osd_request_target t; - u32 last_force_resend; u32 map_dne_bound; struct timespec mtime; -- cgit v1.2.3 From dc98ff7230e5ccf11c621dff0d590e83574a7184 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 15 Jun 2017 16:30:53 +0200 Subject: libceph: introduce ceph_spg, ceph_pg_to_primary_shard() Store both raw pgid and actual spgid in ceph_osd_request_target. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 3 ++- include/linux/ceph/osdmap.h | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index ef630ebd1169..6114f7b02135 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -136,7 +136,8 @@ struct ceph_osd_request_target { struct ceph_object_id target_oid; struct ceph_object_locator target_oloc; - struct ceph_pg pgid; + struct ceph_pg pgid; /* last raw pg we mapped to */ + struct ceph_spg spgid; /* last actual spg we mapped to */ u32 pg_num; u32 pg_num_mask; struct ceph_osds acting; diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 938656f70807..7ae5b416b4b6 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -24,6 +24,13 @@ struct ceph_pg { uint32_t seed; }; +#define CEPH_SPG_NOSHARD -1 + +struct ceph_spg { + struct ceph_pg pgid; + s8 shard; +}; + int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs); #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id @@ -271,6 +278,9 @@ void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap, const struct ceph_pg *raw_pgid, struct ceph_osds *up, struct ceph_osds *acting); +bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap, + const struct ceph_pg *raw_pgid, + struct ceph_spg *spgid); int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap, const struct ceph_pg *raw_pgid); -- cgit v1.2.3 From 98ad5ebd1505eb903ae8bc27e94c1ab0d1c3e651 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 15 Jun 2017 16:30:54 +0200 Subject: libceph: ceph_connection_operations::reencode_message() method Give upper layers a chance to reencode the message after the connection is negotiated and ->peer_features is set. OSD client will use this to support both luminous and pre-luminous OSDs (in a single cluster): the former need MOSDOp v8; the latter will continue to be sent MOSDOp v4. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index c5c4c713e00f..fbd94d9fa5dd 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -44,6 +44,8 @@ struct ceph_connection_operations { struct ceph_msg_header *hdr, int *skip); + void (*reencode_message) (struct ceph_msg *msg); + int (*sign_message) (struct ceph_msg *msg); int (*check_message_signature) (struct ceph_msg *msg); }; -- cgit v1.2.3 From 8cb441c0545dfd4dafeedc1e2d7157e1072413ac Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 15 Jun 2017 16:30:54 +0200 Subject: libceph: MOSDOp v8 encoding (actual spgid + full hash) Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 17 +++++++++++++++++ include/linux/ceph/osdmap.h | 4 +++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 6114f7b02135..bca2718ac253 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -205,6 +205,23 @@ struct ceph_request_redirect { struct ceph_object_locator oloc; }; +/* + * osd request identifier + * + * caller name + incarnation# + tid to unique identify this request + */ +struct ceph_osd_reqid { + struct ceph_entity_name name; + __le64 tid; + __le32 inc; +} __packed; + +struct ceph_blkin_trace_info { + __le64 trace_id; + __le64 span_id; + __le64 parent_span_id; +} __packed; + typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie, u64 notifier_id, void *data, size_t data_len); typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err); diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 7ae5b416b4b6..66447fc7f334 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -205,11 +205,13 @@ static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, return &map->osd_addr[osd]; } +#define CEPH_PGID_ENCODING_LEN (1 + 8 + 4 + 4) + static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) { __u8 version; - if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) { + if (!ceph_has_room(p, end, CEPH_PGID_ENCODING_LEN)) { pr_warn("incomplete pg encoding\n"); return -EINVAL; } -- cgit v1.2.3 From 7de030d6b10a56e991312a978ace6be3c090097c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 15 Jun 2017 16:30:54 +0200 Subject: libceph: resend on PG splits if OSD has RESEND_ON_SPLIT Note that ceph_osd_request_target fields are updated regardless of RESEND_ON_SPLIT. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 66447fc7f334..63fb073a3355 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -249,6 +249,8 @@ static inline void ceph_osds_init(struct ceph_osds *set) void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src); +bool ceph_pg_is_split(const struct ceph_pg *pgid, u32 old_pg_num, + u32 new_pg_num); bool ceph_is_new_interval(const struct ceph_osds *old_acting, const struct ceph_osds *new_acting, const struct ceph_osds *old_up, -- cgit v1.2.3 From 04c7d789e269c2b82bbd08106049a5a979cdb3fd Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 15 Jun 2017 16:30:55 +0200 Subject: libceph: make sure need_resend targets reflect latest map Otherwise we may miss events like PG splits, pool deletions, etc when we get multiple incremental maps at once. Because check_pool_dne() can now be fed an unlinked request, finish_request() needed to be taught to handle unlinked requests. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index bca2718ac253..62c672bcbb31 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -149,6 +149,7 @@ struct ceph_osd_request_target { unsigned int flags; /* CEPH_OSD_FLAG_* */ bool paused; + u32 epoch; u32 last_force_resend; int osd; -- cgit v1.2.3 From df28152d53b449a72258000f592472215fc9371e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 15 Jun 2017 16:30:56 +0200 Subject: libceph: avoid unnecessary pi lookups in calc_target() Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 63fb073a3355..060d059acbf8 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -273,16 +273,22 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u64 off, u64 len, u64 *bno, u64 *oxoff, u64 *oxlen); +int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi, + const struct ceph_object_id *oid, + const struct ceph_object_locator *oloc, + struct ceph_pg *raw_pgid); int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap, - struct ceph_object_id *oid, - struct ceph_object_locator *oloc, + const struct ceph_object_id *oid, + const struct ceph_object_locator *oloc, struct ceph_pg *raw_pgid); void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap, + struct ceph_pg_pool_info *pi, const struct ceph_pg *raw_pgid, struct ceph_osds *up, struct ceph_osds *acting); bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap, + struct ceph_pg_pool_info *pi, const struct ceph_pg *raw_pgid, struct ceph_spg *spgid); int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap, -- cgit v1.2.3 From 76f827a7b1faaaebc53f89d184e95ea3a0b8dd71 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 19 Jun 2017 12:18:05 +0200 Subject: libceph: make DEFINE_RB_* helpers more general Initially for ceph_pg_mapping, ceph_spg_mapping and ceph_hobject_id, compared with ceph_pg_compare(), ceph_spg_compare() and hoid_compare() respectively. Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 49 +++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 3229ae6c7846..8a79587e1317 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -184,10 +184,11 @@ static inline int calc_pages_for(u64 off, u64 len) (off >> PAGE_SHIFT); } -/* - * These are not meant to be generic - an integer key is assumed. - */ -#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \ +#define RB_BYVAL(a) (a) +#define RB_BYPTR(a) (&(a)) +#define RB_CMP3WAY(a, b) ((a) < (b) ? -1 : (a) > (b)) + +#define DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld) \ static void insert_##name(struct rb_root *root, type *t) \ { \ struct rb_node **n = &root->rb_node; \ @@ -197,11 +198,13 @@ static void insert_##name(struct rb_root *root, type *t) \ \ while (*n) { \ type *cur = rb_entry(*n, type, nodefld); \ + int cmp; \ \ parent = *n; \ - if (t->keyfld < cur->keyfld) \ + cmp = cmpexp(keyexp(t->keyfld), keyexp(cur->keyfld)); \ + if (cmp < 0) \ n = &(*n)->rb_left; \ - else if (t->keyfld > cur->keyfld) \ + else if (cmp > 0) \ n = &(*n)->rb_right; \ else \ BUG(); \ @@ -217,19 +220,24 @@ static void erase_##name(struct rb_root *root, type *t) \ RB_CLEAR_NODE(&t->nodefld); \ } -#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \ -extern type __lookup_##name##_key; \ -static type *lookup_##name(struct rb_root *root, \ - typeof(__lookup_##name##_key.keyfld) key) \ +/* + * @lookup_param_type is a parameter and not constructed from (@type, + * @keyfld) with typeof() because adding const is too unwieldy. + */ +#define DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp, \ + lookup_param_type, nodefld) \ +static type *lookup_##name(struct rb_root *root, lookup_param_type key) \ { \ struct rb_node *n = root->rb_node; \ \ while (n) { \ type *cur = rb_entry(n, type, nodefld); \ + int cmp; \ \ - if (key < cur->keyfld) \ + cmp = cmpexp(key, keyexp(cur->keyfld)); \ + if (cmp < 0) \ n = n->rb_left; \ - else if (key > cur->keyfld) \ + else if (cmp > 0) \ n = n->rb_right; \ else \ return cur; \ @@ -238,6 +246,23 @@ static type *lookup_##name(struct rb_root *root, \ return NULL; \ } +#define DEFINE_RB_FUNCS2(name, type, keyfld, cmpexp, keyexp, \ + lookup_param_type, nodefld) \ +DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld) \ +DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, cmpexp, keyexp, \ + lookup_param_type, nodefld) + +/* + * Shorthands for integer keys. + */ +#define DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \ +DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL, nodefld) + +#define DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) \ +extern type __lookup_##name##_key; \ +DEFINE_RB_LOOKUP_FUNC2(name, type, keyfld, RB_CMP3WAY, RB_BYVAL, \ + typeof(__lookup_##name##_key.keyfld), nodefld) + #define DEFINE_RB_FUNCS(name, type, keyfld, nodefld) \ DEFINE_RB_INSDEL_FUNCS(name, type, keyfld, nodefld) \ DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) -- cgit v1.2.3 From a02a946dfe9633d7e0202359836f6b5217a62824 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 19 Jun 2017 12:18:05 +0200 Subject: libceph: respect RADOS_BACKOFF backoffs Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 1 + include/linux/ceph/osd_client.h | 45 +++++++++++++++++++++++++++++++++++++++++ include/linux/ceph/osdmap.h | 1 + include/linux/ceph/rados.h | 6 ++++++ 4 files changed, 53 insertions(+) (limited to 'include') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index ad078ebe25d6..edf5b04b918a 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -147,6 +147,7 @@ struct ceph_dir_layout { #define CEPH_MSG_OSD_OP 42 #define CEPH_MSG_OSD_OPREPLY 43 #define CEPH_MSG_WATCH_NOTIFY 44 +#define CEPH_MSG_OSD_BACKOFF 61 /* watch-notify operations */ diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 62c672bcbb31..c6d96a5f46fd 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -1,6 +1,7 @@ #ifndef _FS_CEPH_OSD_CLIENT_H #define _FS_CEPH_OSD_CLIENT_H +#include #include #include #include @@ -36,6 +37,8 @@ struct ceph_osd { struct ceph_connection o_con; struct rb_root o_requests; struct rb_root o_linger_requests; + struct rb_root o_backoff_mappings; + struct rb_root o_backoffs_by_id; struct list_head o_osd_lru; struct ceph_auth_handshake o_auth; unsigned long lru_ttl; @@ -275,6 +278,48 @@ struct ceph_watch_item { struct ceph_entity_addr addr; }; +struct ceph_spg_mapping { + struct rb_node node; + struct ceph_spg spgid; + + struct rb_root backoffs; +}; + +struct ceph_hobject_id { + void *key; + size_t key_len; + void *oid; + size_t oid_len; + u64 snapid; + u32 hash; + u8 is_max; + void *nspace; + size_t nspace_len; + s64 pool; + + /* cache */ + u32 hash_reverse_bits; +}; + +static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid) +{ + hoid->hash_reverse_bits = bitrev32(hoid->hash); +} + +/* + * PG-wide backoff: [begin, end) + * per-object backoff: begin == end + */ +struct ceph_osd_backoff { + struct rb_node spg_node; + struct rb_node id_node; + + struct ceph_spg spgid; + u64 id; + struct ceph_hobject_id *begin; + struct ceph_hobject_id *end; +}; + #define CEPH_LINGER_ID_START 0xffff000000000000ULL struct ceph_osd_client { diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 060d059acbf8..fe6d189bdd30 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -32,6 +32,7 @@ struct ceph_spg { }; int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs); +int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs); #define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id together */ diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 5d0018782d50..385db08bb8b2 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -439,6 +439,12 @@ enum { const char *ceph_osd_watch_op_name(int o); +enum { + CEPH_OSD_BACKOFF_OP_BLOCK = 1, + CEPH_OSD_BACKOFF_OP_ACK_BLOCK = 2, + CEPH_OSD_BACKOFF_OP_UNBLOCK = 3, +}; + /* * an individual object operation. each may be accompanied by some data * payload -- cgit v1.2.3 From 278b1d709c6acc6f7d138fed775c76695b068e43 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 21 Jun 2017 17:27:17 +0200 Subject: libceph: ceph_decode_skip_* helpers Some of these won't be as efficient as they could be (e.g. ceph_decode_skip_set(... 32 ...) could advance by len * sizeof(u32) once instead of advancing by sizeof(u32) len times), but that's fine and not worth a bunch of extra macro code. Replace skip_name_map() with ceph_decode_skip_map as an example. Signed-off-by: Ilya Dryomov --- include/linux/ceph/decode.h | 60 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include') diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index f990f2cc907a..14af9b70d301 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -132,6 +132,66 @@ bad: return ERR_PTR(-ERANGE); } +/* + * skip helpers + */ +#define ceph_decode_skip_n(p, end, n, bad) \ + do { \ + ceph_decode_need(p, end, n, bad); \ + *p += n; \ + } while (0) + +#define ceph_decode_skip_64(p, end, bad) \ +ceph_decode_skip_n(p, end, sizeof(u64), bad) + +#define ceph_decode_skip_32(p, end, bad) \ +ceph_decode_skip_n(p, end, sizeof(u32), bad) + +#define ceph_decode_skip_16(p, end, bad) \ +ceph_decode_skip_n(p, end, sizeof(u16), bad) + +#define ceph_decode_skip_8(p, end, bad) \ +ceph_decode_skip_n(p, end, sizeof(u8), bad) + +#define ceph_decode_skip_string(p, end, bad) \ + do { \ + u32 len; \ + \ + ceph_decode_32_safe(p, end, len, bad); \ + ceph_decode_skip_n(p, end, len, bad); \ + } while (0) + +#define ceph_decode_skip_set(p, end, type, bad) \ + do { \ + u32 len; \ + \ + ceph_decode_32_safe(p, end, len, bad); \ + while (len--) \ + ceph_decode_skip_##type(p, end, bad); \ + } while (0) + +#define ceph_decode_skip_map(p, end, ktype, vtype, bad) \ + do { \ + u32 len; \ + \ + ceph_decode_32_safe(p, end, len, bad); \ + while (len--) { \ + ceph_decode_skip_##ktype(p, end, bad); \ + ceph_decode_skip_##vtype(p, end, bad); \ + } \ + } while (0) + +#define ceph_decode_skip_map_of_map(p, end, ktype1, ktype2, vtype2, bad) \ + do { \ + u32 len; \ + \ + ceph_decode_32_safe(p, end, len, bad); \ + while (len--) { \ + ceph_decode_skip_##ktype1(p, end, bad); \ + ceph_decode_skip_map(p, end, ktype2, vtype2, bad); \ + } \ + } while (0) + /* * struct ceph_timespec <-> struct timespec */ -- cgit v1.2.3 From 6f428df47dae2c8ea31fd4c0c74a12a8a5ac2d1d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 21 Jun 2017 17:27:18 +0200 Subject: libceph: pg_upmap[_items] infrastructure pg_temp and pg_upmap encodings are the same (PG -> array of osds), except for the incremental remove: it's an empty mapping in new_pg_temp for pg_temp and a separate old_pg_upmap set for pg_upmap. (This isn't to allow for empty pg_upmap mappings -- apparently, pg_temp just wasn't looked at as an example for pg_upmap encoding.) Reuse __decode_pg_temp() for decoding pg_upmap and new_pg_upmap. __decode_pg_temp() stores into pg_temp union member, but since pg_upmap union member is identical, reading through pg_upmap later is OK. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index fe6d189bdd30..c612cff81f5c 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -143,10 +143,14 @@ struct ceph_pg_mapping { struct { int len; int osds[]; - } pg_temp; + } pg_temp, pg_upmap; struct { int osd; } primary_temp; + struct { + int len; + int from_to[][2]; + } pg_upmap_items; }; }; @@ -165,6 +169,10 @@ struct ceph_osdmap { struct rb_root pg_temp; struct rb_root primary_temp; + /* remap (post-CRUSH, pre-up) */ + struct rb_root pg_upmap; /* PG := raw set */ + struct rb_root pg_upmap_items; /* from -> to within raw set */ + u32 *osd_primary_affinity; struct rb_root pg_pools; -- cgit v1.2.3 From 069f3222ca96acfe8c59937e98c401bda5475b48 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 22 Jun 2017 19:44:05 +0200 Subject: crush: implement weight and id overrides for straw2 bucket_straw2_choose needs to use weights that may be different from weight_items. For instance to compensate for an uneven distribution caused by a low number of values. Or to fix the probability biais introduced by conditional probabilities (see http://tracker.ceph.com/issues/15653 for more information). We introduce a weight_set for each straw2 bucket to set the desired weight for a given item at a given position. The weight of a given item when picking the first replica (first position) may be different from the weight the second replica (second position). For instance the weight matrix for a given bucket containing items 3, 7 and 13 could be as follows: position 0 position 1 item 3 0x10000 0x100000 item 7 0x40000 0x10000 item 13 0x40000 0x10000 When crush_do_rule picks the first of two replicas (position 0), item 7, 3 are four times more likely to be choosen by bucket_straw2_choose than item 13. When choosing the second replica (position 1), item 3 is ten times more likely to be choosen than item 7, 13. By default the weight_set of each bucket exactly matches the content of item_weights for each position to ensure backward compatibility. bucket_straw2_choose compares items by using their id. The same ids are also used to index buckets and they must be unique. For each item in a bucket an array of ids can be provided for placement purposes and they are used instead of the ids. If no replacement ids are provided, the legacy behavior is preserved. Reflects ceph.git commit 19537a450fd5c5a0bb8b7830947507a76db2ceca. Signed-off-by: Ilya Dryomov --- include/linux/crush/crush.h | 58 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/crush/mapper.h | 9 +++---- 2 files changed, 62 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index fbecbd089d75..d8676e56fa23 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -137,6 +137,64 @@ struct crush_bucket { }; +/** @ingroup API + * + * Replacement weights for each item in a bucket. The size of the + * array must be exactly the size of the straw2 bucket, just as the + * item_weights array. + * + */ +struct crush_weight_set { + __u32 *weights; /*!< 16.16 fixed point weights + in the same order as items */ + __u32 size; /*!< size of the __weights__ array */ +}; + +/** @ingroup API + * + * Replacement weights and ids for a given straw2 bucket, for + * placement purposes. + * + * When crush_do_rule() chooses the Nth item from a straw2 bucket, the + * replacement weights found at __weight_set[N]__ are used instead of + * the weights from __item_weights__. If __N__ is greater than + * __weight_set_size__, the weights found at __weight_set_size-1__ are + * used instead. For instance if __weight_set__ is: + * + * [ [ 0x10000, 0x20000 ], // position 0 + * [ 0x20000, 0x40000 ] ] // position 1 + * + * choosing the 0th item will use position 0 weights [ 0x10000, 0x20000 ] + * choosing the 1th item will use position 1 weights [ 0x20000, 0x40000 ] + * choosing the 2th item will use position 1 weights [ 0x20000, 0x40000 ] + * etc. + * + */ +struct crush_choose_arg { + __s32 *ids; /*!< values to use instead of items */ + __u32 ids_size; /*!< size of the __ids__ array */ + struct crush_weight_set *weight_set; /*!< weight replacements for + a given position */ + __u32 weight_set_size; /*!< size of the __weight_set__ array */ +}; + +/** @ingroup API + * + * Replacement weights and ids for each bucket in the crushmap. The + * __size__ of the __args__ array must be exactly the same as the + * __map->max_buckets__. + * + * The __crush_choose_arg__ at index N will be used when choosing + * an item from the bucket __map->buckets[N]__ bucket, provided it + * is a straw2 bucket. + * + */ +struct crush_choose_arg_map { + struct crush_choose_arg *args; /*!< replacement for each bucket + in the crushmap */ + __u32 size; /*!< size of the __args__ array */ +}; + struct crush_bucket_uniform { struct crush_bucket h; __u32 item_weight; /* 16-bit fixed point; all items equally weighted */ diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index c95e19e1ff11..141edabb947e 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -11,11 +11,10 @@ #include "crush.h" extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size); -extern int crush_do_rule(const struct crush_map *map, - int ruleno, - int x, int *result, int result_max, - const __u32 *weights, int weight_max, - void *cwin); +int crush_do_rule(const struct crush_map *map, + int ruleno, int x, int *result, int result_max, + const __u32 *weight, int weight_max, + void *cwin, const struct crush_choose_arg *choose_args); /* * Returns the exact amount of workspace that will need to be used -- cgit v1.2.3 From 5cf9c4a9959b6273675310d14a834ef14fbca37c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 22 Jun 2017 19:44:05 +0200 Subject: libceph, crush: per-pool crush_choose_arg_map for crush_do_rule() If there is no crush_choose_arg_map for a given pool, a NULL pointer is passed to preserve existing crush_do_rule() behavior. Reflects ceph.git commits 55fb91d64071552ea1bc65ab4ea84d3c8b73ab4b, dbe36e08be00c6519a8c89718dd47b0219c20516. Signed-off-by: Ilya Dryomov --- include/linux/crush/crush.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index d8676e56fa23..92e165d417a6 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -2,6 +2,7 @@ #define CEPH_CRUSH_CRUSH_H #ifdef __KERNEL__ +# include # include #else # include "crush_compat.h" @@ -190,6 +191,10 @@ struct crush_choose_arg { * */ struct crush_choose_arg_map { +#ifdef __KERNEL__ + struct rb_node node; + u64 choose_args_index; +#endif struct crush_choose_arg *args; /*!< replacement for each bucket in the crushmap */ __u32 size; /*!< size of the __args__ array */ @@ -294,6 +299,9 @@ struct crush_map { __u32 allowed_bucket_algs; __u32 *choose_tries; +#else + /* CrushWrapper::choose_args */ + struct rb_root choose_args; #endif }; -- cgit v1.2.3 From 0bb05da2ec57163b7a25efef001ed8f52b18b070 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 22 Jun 2017 19:44:06 +0200 Subject: libceph: osd_state is 32 bits wide in luminous Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index c612cff81f5c..a0996cb9faed 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -162,7 +162,7 @@ struct ceph_osdmap { u32 flags; /* CEPH_OSDMAP_* */ u32 max_osd; /* size of osd_state, _offload, _addr arrays */ - u8 *osd_state; /* CEPH_OSD_* */ + u32 *osd_state; /* CEPH_OSD_* */ u32 *osd_weight; /* 0 = failed, 0x10000 = 100% normal */ struct ceph_entity_addr *osd_addr; @@ -203,7 +203,7 @@ static inline bool ceph_osd_is_down(struct ceph_osdmap *map, int osd) return !ceph_osd_is_up(map, osd); } -extern char *ceph_osdmap_state_str(char *str, int len, int state); +char *ceph_osdmap_state_str(char *str, int len, u32 state); extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd); static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, -- cgit v1.2.3 From 33e9c8dbfbcef8e4cda8e43a445e692ab7e0d8c0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 26 Jun 2017 12:05:55 +0200 Subject: libceph: advertise support for NEW_OSDOP_ENCODING and SERVER_LUMINOUS All four SERVER_LUMINOUS feature bits are implemented, switch it on! NEW_OSDOP_ENCODING doesn't mean much for the client (it signifies support for MOSDOp v6) but needs to be enabled in order to get the latest (currently v25) pg_pool_t. Signed-off-by: Ilya Dryomov Acked-by: Sage Weil --- include/linux/ceph/ceph_features.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 78a58770e6e9..f0f6c537b64c 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -184,6 +184,11 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_PGPOOL3 | \ CEPH_FEATURE_OSDENC | \ CEPH_FEATURE_CRUSH_TUNABLES | \ + CEPH_FEATURE_SERVER_LUMINOUS | \ + CEPH_FEATURE_RESEND_ON_SPLIT | \ + CEPH_FEATURE_RADOS_BACKOFF | \ + CEPH_FEATURE_OSDMAP_PG_UPMAP | \ + CEPH_FEATURE_CRUSH_CHOOSE_ARGS | \ CEPH_FEATURE_MSG_AUTH | \ CEPH_FEATURE_CRUSH_TUNABLES2 | \ CEPH_FEATURE_REPLY_CREATE_INODE | \ @@ -199,6 +204,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin CEPH_FEATURE_MSGR_KEEPALIVE2 | \ CEPH_FEATURE_OSD_POOLRESEND | \ CEPH_FEATURE_CRUSH_V4 | \ + CEPH_FEATURE_NEW_OSDOP_ENCODING | \ CEPH_FEATURE_SERVER_JEWEL | \ CEPH_FEATURE_MON_STATEFUL_SUB | \ CEPH_FEATURE_CRUSH_TUNABLES5 | \ -- cgit v1.2.3 From 8f1a357d41a22009150cf404b5aa5876efdb59b1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 6 Jul 2017 20:26:17 +0300 Subject: i2c: Provide a stub for i2c_detect_slave_mode() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers would like to call i2c_detect_slave_mode() even if !I2C_SLAVE. Give them what they want to, Otherwise kernel will not compile: drivers/i2c/busses/i2c-designware-platdrv.c: In function ‘dw_i2c_plat_probe’: drivers/i2c/busses/i2c-designware-platdrv.c:331:6: error: implicit declaration of function ‘i2c_detect_slave_mode’ [-Werror=implicit-function-declaration] if (i2c_detect_slave_mode(&pdev->dev)) ^~~~~~~~~~~~~~~~~~~~~ cc1: some warnings being treated as errors Fixes: 6e38cf3b4421 ("i2c: designware: Let slave adapter support be optional") Reported-by: Abdul Haleem Signed-off-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 72d0ece70ed3..00ca5b86a753 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -295,6 +295,8 @@ static inline int i2c_slave_event(struct i2c_client *client, { return client->slave_cb(client, event, val); } +#else +static inline bool i2c_detect_slave_mode(struct device *dev) { return false; } #endif /** -- cgit v1.2.3 From d1438ad8f3eec7207618b8e01f9f3eec7b6f67c4 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 7 Jul 2017 18:08:25 -0700 Subject: nvme_fc/nvmet_fc: revise Create Association descriptor length Revises the Create Association LS for the amount of pad expected in 1.16. Add defines for the minimum lengths that a target can accept (e.g. variable pad lengths) Signed-off-by: James Smart Signed-off-by: Sagi Grimberg --- include/linux/nvme-fc.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h index bc711a10be05..21c37e39e41a 100644 --- a/include/linux/nvme-fc.h +++ b/include/linux/nvme-fc.h @@ -17,6 +17,7 @@ /* * This file contains definitions relative to FC-NVME r1.14 (16-020vB). + * The fcnvme_lsdesc_cr_assoc_cmd struct reflects expected r1.16 content. */ #ifndef _NVME_FC_H @@ -193,9 +194,21 @@ struct fcnvme_lsdesc_cr_assoc_cmd { uuid_t hostid; u8 hostnqn[FCNVME_ASSOC_HOSTNQN_LEN]; u8 subnqn[FCNVME_ASSOC_SUBNQN_LEN]; - u8 rsvd632[384]; + __be32 rsvd584[108]; /* pad to 1016 bytes, + * which makes overall LS rqst + * payload 1024 bytes + */ }; +#define FCNVME_LSDESC_CRA_CMD_DESC_MINLEN \ + offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, rsvd584) + +#define FCNVME_LSDESC_CRA_CMD_DESC_MIN_DESCLEN \ + (FCNVME_LSDESC_CRA_CMD_DESC_MINLEN - \ + offsetof(struct fcnvme_lsdesc_cr_assoc_cmd, ersp_ratio)) + + + /* FCNVME_LSDESC_CREATE_CONN_CMD */ struct fcnvme_lsdesc_cr_conn_cmd { __be32 desc_tag; /* FCNVME_LSDESC_xxx */ @@ -273,6 +286,14 @@ struct fcnvme_ls_cr_assoc_rqst { struct fcnvme_lsdesc_cr_assoc_cmd assoc_cmd; }; +#define FCNVME_LSDESC_CRA_RQST_MINLEN \ + (offsetof(struct fcnvme_ls_cr_assoc_rqst, assoc_cmd) + \ + FCNVME_LSDESC_CRA_CMD_DESC_MINLEN) + +#define FCNVME_LSDESC_CRA_RQST_MIN_LISTLEN \ + FCNVME_LSDESC_CRA_CMD_DESC_MINLEN + + struct fcnvme_ls_cr_assoc_acc { struct fcnvme_ls_acc_hdr hdr; struct fcnvme_lsdesc_assoc_id associd; -- cgit v1.2.3 From 7e988b103d0d52190244517edc76e649071284bb Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 7 Jul 2017 15:49:00 +0200 Subject: KVM: use correct accessor function for __kvm_memslots kvm memslots are protected by srcu and not by rcu. We must use srcu_dereference_check instead of rcu_dereference_check. Signed-off-by: Christian Borntraeger Suggested-by: Paolo Bonzini --- include/linux/kvm_host.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b3ca77a96b2d..648b34cabb38 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -568,9 +568,8 @@ void kvm_put_kvm(struct kvm *kvm); static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { - return rcu_dereference_check(kvm->memslots[as_id], - srcu_read_lock_held(&kvm->srcu) - || lockdep_is_held(&kvm->slots_lock)); + return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu, + lockdep_is_held(&kvm->slots_lock)); } static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) -- cgit v1.2.3 From c43aeb198048f64abda8655fdcdebe71cf1877ba Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 10 Jul 2017 07:40:49 -0400 Subject: fix brown paperbag bug in inlined copy_..._iter() "copied nothing" == "return 0", not "return full size". Fixes: aa28de275a24 "iov_iter/hardening: move object size checks to inlined part" Spotted-by: Arnd Bergmann Signed-off-by: Al Viro --- include/linux/uio.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/uio.h b/include/linux/uio.h index 342d2dc225b9..8a642cda641c 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -103,7 +103,7 @@ static __always_inline __must_check size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(!check_copy_size(addr, bytes, true))) - return bytes; + return 0; else return _copy_to_iter(addr, bytes, i); } @@ -112,7 +112,7 @@ static __always_inline __must_check size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(!check_copy_size(addr, bytes, false))) - return bytes; + return 0; else return _copy_from_iter(addr, bytes, i); } @@ -130,7 +130,7 @@ static __always_inline __must_check size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(!check_copy_size(addr, bytes, false))) - return bytes; + return 0; else return _copy_from_iter_nocache(addr, bytes, i); } @@ -160,7 +160,7 @@ static __always_inline __must_check size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(!check_copy_size(addr, bytes, false))) - return bytes; + return 0; else return _copy_from_iter_flushcache(addr, bytes, i); } -- cgit v1.2.3 From 42a6e0996084972574e0a2b23e7326b78b0f64c5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 10 Jul 2017 13:22:50 +0200 Subject: nvmem: include linux/err.h from header The new support for nvmem devices from the rtc layer caused a build error in some configurations: include/linux/nvmem-provider.h: In function 'nvmem_register': include/linux/nvmem-provider.h:51:9: error: implicit declaration of function 'ERR_PTR' [-Werror=implicit-function-declaration] This adds the missing include to ensure we can always include the header. Fixes: 697e5a47aa12 ("rtc: add generic nvmem support") Signed-off-by: Arnd Bergmann Acked-by: Srinivas Kandagatla Signed-off-by: Alexandre Belloni --- include/linux/nvmem-provider.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index cd93416d762e..497706f5adca 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -12,6 +12,9 @@ #ifndef _LINUX_NVMEM_PROVIDER_H #define _LINUX_NVMEM_PROVIDER_H +#include +#include + struct nvmem_device; struct nvmem_cell_info; typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset, -- cgit v1.2.3 From 7cee9384cb3e25de33d75ecdbf08bb15b4ea9fa5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 10 Jul 2017 11:40:19 -0700 Subject: Fix up over-eager 'wait_queue_t' renaming Commit ac6424b981bc ("sched/wait: Rename wait_queue_t => wait_queue_entry_t") had scripted the renaming incorrectly, and didn't actually check that the 'wait_queue_t' was a full token. As a result, it also triggered on 'wait_queue_token', and renamed that to 'wait_queue_entry_token' entry in the autofs4 packet structure definition too. That was entirely incorrect, and not intended. The end result built fine when building just the kernel - because everything had been renamed consistently there - but caused problems in user space because the "struct autofs_packet_missing" type is exported as part of the uapi. This scripts it all back again: git grep -lw wait_queue_entry_token | xargs sed -i 's/wait_queue_entry_token/wait_queue_token/g' and checks the end result. Reported-by: Florian Fainelli Acked-by: Ingo Molnar Fixes: ac6424b981bc ("sched/wait: Rename wait_queue_t => wait_queue_entry_t") Signed-off-by: Linus Torvalds --- include/uapi/linux/auto_fs.h | 4 ++-- include/uapi/linux/auto_fs4.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index 1953f8d6063b..aa63451ef20a 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h @@ -26,7 +26,7 @@ #define AUTOFS_MIN_PROTO_VERSION AUTOFS_PROTO_VERSION /* - * The wait_queue_entry_token (autofs_wqt_t) is part of a structure which is passed + * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed * back to the kernel via ioctl from userspace. On architectures where 32- and * 64-bit userspace binaries can be executed it's important that the size of * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we @@ -49,7 +49,7 @@ struct autofs_packet_hdr { struct autofs_packet_missing { struct autofs_packet_hdr hdr; - autofs_wqt_t wait_queue_entry_token; + autofs_wqt_t wait_queue_token; int len; char name[NAME_MAX+1]; }; diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h index 65b72d0222e7..7c6da423d54e 100644 --- a/include/uapi/linux/auto_fs4.h +++ b/include/uapi/linux/auto_fs4.h @@ -108,7 +108,7 @@ enum autofs_notify { /* v4 multi expire (via pipe) */ struct autofs_packet_expire_multi { struct autofs_packet_hdr hdr; - autofs_wqt_t wait_queue_entry_token; + autofs_wqt_t wait_queue_token; int len; char name[NAME_MAX+1]; }; @@ -123,7 +123,7 @@ union autofs_packet_union { /* autofs v5 common packet struct */ struct autofs_v5_packet { struct autofs_packet_hdr hdr; - autofs_wqt_t wait_queue_entry_token; + autofs_wqt_t wait_queue_token; __u32 dev; __u64 ino; __u32 uid; -- cgit v1.2.3 From 23955622ff8d231bcc9650b3d06583f117a6e3ba Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 10 Jul 2017 15:47:11 -0700 Subject: swap: add block io poll in swapin path For fast flash disk, async IO could introduce overhead because of context switch. block-mq now supports IO poll, which improves performance and latency a lot. swapin is a good place to use this technique, because the task is waiting for the swapin page to continue execution. In my virtual machine, directly read 4k data from a NVMe with iopoll is about 60% better than that without poll. With iopoll support in swapin patch, my microbenchmark (a task does random memory write) is about 10%~25% faster. CPU utilization increases a lot though, 2x and even 3x CPU utilization. This will depend on disk speed. While iopoll in swapin isn't intended for all usage cases, it's a win for latency sensistive workloads with high speed swap disk. block layer has knob to control poll in runtime. If poll isn't enabled in block layer, there should be no noticeable change in swapin. I got a chance to run the same test in a NVMe with DRAM as the media. In simple fio IO test, blkpoll boosts 50% performance in single thread test and ~20% in 8 threads test. So this is the base line. In above swap test, blkpoll boosts ~27% performance in single thread test. blkpoll uses 2x CPU time though. If we enable hybid polling, the performance gain has very slight drop but CPU time is only 50% worse than that without blkpoll. Also we can adjust parameter of hybid poll, with it, the CPU time penality is reduced further. In 8 threads test, blkpoll doesn't help though. The performance is similar to that without blkpoll, but cpu utilization is similar too. There is lock contention in swap path. The cpu time spending on blkpoll isn't high. So overall, blkpoll swapin isn't worse than that without it. The swapin readahead might read several pages in in the same time and form a big IO request. Since the IO will take longer time, it doesn't make sense to do poll, so the patch only does iopoll for single page swapin. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/070c3c3e40b711e7b1390002c991e86a-b5408f0@7511894063d3764ff01ea8111f5a004d7dd700ed078797c204a24e620ddb965c Signed-off-by: Shaohua Li Cc: Tim Chen Cc: Huang Ying Cc: Jens Axboe Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 5ab1c98c7d27..61e7180cee21 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -331,7 +331,7 @@ extern void kswapd_stop(int nid); #include /* for bio_end_io_t */ /* linux/mm/page_io.c */ -extern int swap_readpage(struct page *); +extern int swap_readpage(struct page *page, bool do_poll); extern int swap_writepage(struct page *page, struct writeback_control *wbc); extern void end_swap_bio_write(struct bio *bio); extern int __swap_writepage(struct page *page, struct writeback_control *wbc, @@ -362,7 +362,8 @@ extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); extern struct page *lookup_swap_cache(swp_entry_t); extern struct page *read_swap_cache_async(swp_entry_t, gfp_t, - struct vm_area_struct *vma, unsigned long addr); + struct vm_area_struct *vma, unsigned long addr, + bool do_poll); extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr, bool *new_page_allocated); -- cgit v1.2.3 From b37ff71cc626a0c1b5e098ff9a0b723815f6aaeb Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 10 Jul 2017 15:47:38 -0700 Subject: mm: hwpoison: change PageHWPoison behavior on hugetlb pages We'd like to narrow down the error region in memory error on hugetlb pages. However, currently we set PageHWPoison flags on all subpages in the error hugepage and add # of subpages to num_hwpoison_pages, which doesn't fit our purpose. So this patch changes the behavior and we only set PageHWPoison on the head page then increase num_hwpoison_pages only by 1. This is a preparation for narrow-down part which comes in later patches. Link: http://lkml.kernel.org/r/1496305019-5493-4-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Cc: Michal Hocko Cc: "Aneesh Kumar K.V" Cc: Anshuman Khandual Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swapops.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 5c3a5f3e7eec..c5ff7b217ee6 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -196,15 +196,6 @@ static inline void num_poisoned_pages_dec(void) atomic_long_dec(&num_poisoned_pages); } -static inline void num_poisoned_pages_add(long num) -{ - atomic_long_add(num, &num_poisoned_pages); -} - -static inline void num_poisoned_pages_sub(long num) -{ - atomic_long_sub(num, &num_poisoned_pages); -} #else static inline swp_entry_t make_hwpoison_entry(struct page *page) -- cgit v1.2.3 From c3114a84f7f96c9d5c73c8bfa7e21ff42fda97e2 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 10 Jul 2017 15:47:41 -0700 Subject: mm: hugetlb: soft-offline: dissolve source hugepage after successful migration Currently hugepage migrated by soft-offline (i.e. due to correctable memory errors) is contained as a hugepage, which means many non-error pages in it are unreusable, i.e. wasted. This patch solves this issue by dissolving source hugepages into buddy. As done in previous patch, PageHWPoison is set only on a head page of the error hugepage. Then in dissoliving we move the PageHWPoison flag to the raw error page so that all healthy subpages return back to buddy. [arnd@arndb.de: fix warnings: replace some macros with inline functions] Link: http://lkml.kernel.org/r/20170609102544.2947326-1-arnd@arndb.de Link: http://lkml.kernel.org/r/1496305019-5493-5-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Anshuman Khandual Signed-off-by: Naoya Horiguchi Signed-off-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 46bfb702e7d6..668ab1742ef6 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -472,6 +472,7 @@ static inline pgoff_t basepage_index(struct page *page) return __basepage_index(page); } +extern int dissolve_free_huge_page(struct page *page); extern int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn); static inline bool hugepage_migration_supported(struct hstate *h) @@ -550,15 +551,37 @@ static inline unsigned int pages_per_huge_page(struct hstate *h) { return 1; } -#define hstate_index_to_shift(index) 0 -#define hstate_index(h) 0 + +static inline unsigned hstate_index_to_shift(unsigned index) +{ + return 0; +} + +static inline int hstate_index(struct hstate *h) +{ + return 0; +} static inline pgoff_t basepage_index(struct page *page) { return page->index; } -#define dissolve_free_huge_pages(s, e) 0 -#define hugepage_migration_supported(h) false + +static inline int dissolve_free_huge_page(struct page *page) +{ + return 0; +} + +static inline int dissolve_free_huge_pages(unsigned long start_pfn, + unsigned long end_pfn) +{ + return 0; +} + +static inline bool hugepage_migration_supported(struct hstate *h) +{ + return false; +} static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) -- cgit v1.2.3 From ddd40d8a2c4ef8f2152ea6d227e11475cf7e5bfa Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Mon, 10 Jul 2017 15:47:53 -0700 Subject: mm: hugetlb: delete dequeue_hwpoisoned_huge_page() dequeue_hwpoisoned_huge_page() is no longer used, so let's remove it. Link: http://lkml.kernel.org/r/1496305019-5493-9-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 668ab1742ef6..57f700ac127e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -116,7 +116,6 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); -int dequeue_hwpoisoned_huge_page(struct page *page); bool isolate_huge_page(struct page *page, struct list_head *list); void putback_active_hugepage(struct page *page); void free_huge_page(struct page *page); @@ -192,10 +191,6 @@ static inline void hugetlb_show_meminfo(void) #define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ src_addr, pagep) ({ BUG(); 0; }) #define huge_pte_offset(mm, address, sz) 0 -static inline int dequeue_hwpoisoned_huge_page(struct page *page) -{ - return 0; -} static inline bool isolate_huge_page(struct page *page, struct list_head *list) { -- cgit v1.2.3 From 1860033237d4be09c5d7382585f0c7229367a534 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 10 Jul 2017 15:48:02 -0700 Subject: mm: make PR_SET_THP_DISABLE immediately active PR_SET_THP_DISABLE has a rather subtle semantic. It doesn't affect any existing mapping because it only updated mm->def_flags which is a template for new mappings. The mappings created after prctl(PR_SET_THP_DISABLE) have VM_NOHUGEPAGE flag set. This can be quite surprising for all those applications which do not do prctl(); fork() & exec() and want to control their own THP behavior. Another usecase when the immediate semantic of the prctl might be useful is a combination of pre- and post-copy migration of containers with CRIU. In this case CRIU populates a part of a memory region with data that was saved during the pre-copy stage. Afterwards, the region is registered with userfaultfd and CRIU expects to get page faults for the parts of the region that were not yet populated. However, khugepaged collapses the pages and the expected page faults do not occur. In more general case, the prctl(PR_SET_THP_DISABLE) could be used as a temporary mechanism for enabling/disabling THP process wide. Implementation wise, a new MMF_DISABLE_THP flag is added. This flag is tested when decision whether to use huge pages is taken either during page fault of at the time of THP collapse. It should be noted, that the new implementation makes PR_SET_THP_DISABLE master override to any per-VMA setting, which was not the case previously. Fixes: a0715cc22601 ("mm, thp: add VM_INIT_DEF_MASK and PRCTL_THP_DISABLE") Link: http://lkml.kernel.org/r/1496415802-30944-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Michal Hocko Signed-off-by: Mike Rapoport Cc: Vlastimil Babka Cc: Andrea Arcangeli Cc: Arnd Bergmann Cc: "Kirill A. Shutemov" Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 1 + include/linux/khugepaged.h | 3 ++- include/linux/sched/coredump.h | 5 ++++- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index d3b3e8fcc717..40d7b7dd2653 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -92,6 +92,7 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma); (1<vm_flags & VM_HUGEPAGE))) && \ !((__vma)->vm_flags & VM_NOHUGEPAGE) && \ + !test_bit(MMF_DISABLE_THP, &(__vma)->vm_mm->flags) && \ !is_vma_temporary_stack(__vma)) #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 5d9a400af509..f0d7335336cd 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -48,7 +48,8 @@ static inline int khugepaged_enter(struct vm_area_struct *vma, if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) if ((khugepaged_always() || (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) && - !(vm_flags & VM_NOHUGEPAGE)) + !(vm_flags & VM_NOHUGEPAGE) && + !test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) if (__khugepaged_enter(vma->vm_mm)) return -ENOMEM; return 0; diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 69eedcef8f03..98ae0d05aa32 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -68,7 +68,10 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ +#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ +#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) +#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ + MMF_DISABLE_THP_MASK) #endif /* _LINUX_SCHED_COREDUMP_H */ -- cgit v1.2.3 From 7ab0e50ad0831e714dcdc3de44a7fe3887732b7c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 10 Jul 2017 15:48:18 -0700 Subject: oom, trace: remove ENUM evaluation of COMPACTION_FEEDBACK After enabling CONFIG_TRACE_ENUM_MAP_FILE (which will soon be renamed to CONFIG_TRACE_EVAL_MAP_FILE), I am able to examine the enums that have been evaluated: # cat /sys/kernel/debug/tracing/enum_map (which will soon be renamed to eval_map) And it showed some interesting results: [..] ZONE_MOVABLE 3 (oom) ZONE_NORMAL 2 (oom) ZONE_DMA32 1 (oom) ZONE_DMA 0 (oom) 3 3 (oom) 2 2 (oom) 1 1 (oom) COMPACT_PRIO_ASYNC 2 (oom) COMPACT_PRIO_SYNC_LIGHT 1 (oom) COMPACT_PRIO_SYNC_FULL 0 (oom) [..] ZONE_DMA 0 (vmscan) 3 3 (vmscan) 2 2 (vmscan) 1 1 (vmscan) COMPACT_PRIO_ASYNC 2 (vmscan) [..] ZONE_DMA 0 (kmem) 3 3 (kmem) 2 2 (kmem) 1 1 (kmem) COMPACT_PRIO_ASYNC 2 (kmem) [..] ZONE_DMA 0 (compaction) 3 3 (compaction) 2 2 (compaction) 1 1 (compaction) COMPACT_PRIO_ASYNC 2 (compaction) [..] The name within the parenthesis are the trace systems that the enum/eval maps are associated with. When there's a number evaluated to another number, that tells me that the TRACE_DEFINE_ENUM() was used on a #define and not an enum. As #defines get converted normally, they are not needed to be evaluated. Each of the above trace systems with the number to number evaluation included the file include/trace/events/mmflags.h which has: /* High-level compaction status feedback */ #define COMPACTION_FAILED 1 #define COMPACTION_WITHDRAWN 2 #define COMPACTION_PROGRESS 3 [..] #define COMPACTION_FEEDBACK \ EM(COMPACTION_FAILED, "failed") \ EM(COMPACTION_WITHDRAWN, "withdrawn") \ EMe(COMPACTION_PROGRESS, "progress") Which is still needed for the __print_symbolic() usage in the trace_event. But it is not needed to be evaluated. Removing the evaluation part removes the unnecessary evaluations of numbers to numbers. Link: http://lkml.kernel.org/r/20170615074944.7be9a647@gandalf.local.home Signed-off-by: Steven Rostedt (VMware) Cc: Michal Hocko Cc: Vlastimil Babka Cc: David Rientjes Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/mmflags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 304ff94363b2..10e3663a75a6 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -257,7 +257,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ COMPACTION_STATUS COMPACTION_PRIORITY -COMPACTION_FEEDBACK +/* COMPACTION_FEEDBACK are defines not enums. Not needed here. */ ZONE_TYPE LRU_NAMES -- cgit v1.2.3 From 16981d763501c0e06e434cf6b59f964c520e0ccc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 10 Jul 2017 15:48:22 -0700 Subject: mm: improve readability of transparent_hugepage_enabled() Turn the macro into a static inline and rewrite the condition checks for better readability in preparation for adding another condition. [ross.zwisler@linux.intel.com: fix logic to make conversion equivalent] [akpm@linux-foundation.org: resolve vs mm-make-pr_set_thp_disable-immediately-active.patch] [akpm@linux-foundation.org: include coredump.h for MMF_DISABLE_THP] Link: http://lkml.kernel.org/r/149739530612.20686.14760671150202647861.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reviewed-by: Ross Zwisler Acked-by: "Kirill A. Shutemov" Acked-by: Michal Hocko Cc: Christoph Hellwig Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 40d7b7dd2653..f4239d3c9c73 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -1,6 +1,8 @@ #ifndef _LINUX_HUGE_MM_H #define _LINUX_HUGE_MM_H +#include + extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf); extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, @@ -85,15 +87,29 @@ extern struct kobj_attribute shmem_enabled_attr; extern bool is_vma_temporary_stack(struct vm_area_struct *vma); -#define transparent_hugepage_enabled(__vma) \ - ((transparent_hugepage_flags & \ - (1<vm_flags & VM_HUGEPAGE))) && \ - !((__vma)->vm_flags & VM_NOHUGEPAGE) && \ - !test_bit(MMF_DISABLE_THP, &(__vma)->vm_mm->flags) && \ - !is_vma_temporary_stack(__vma)) +extern unsigned long transparent_hugepage_flags; + +static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_NOHUGEPAGE) + return false; + + if (is_vma_temporary_stack(vma)) + return false; + + if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) + return false; + + if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG)) + return true; + + if (transparent_hugepage_flags & + (1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)) + return !!(vma->vm_flags & VM_HUGEPAGE); + + return false; +} + #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ (1< Date: Mon, 10 Jul 2017 15:48:25 -0700 Subject: mm: always enable thp for dax mappings The madvise policy for transparent huge pages is meant to avoid unwanted allocations of transparent huge pages. It allows a policy of disabling the extra memory pressure and effort to arrange for a huge page when it is not needed. DAX by definition never incurs this overhead since it is statically allocated. The policy choice makes even less sense for device-dax which tries to guarantee a given tlb-fault size. Specifically, the following setting: echo never > /sys/kernel/mm/transparent_hugepage/enabled ...violates that guarantee and silently disables all device-dax instances with a 2M or 1G alignment. So, let's avoid that non-obvious side effect by force enabling thp for dax mappings in all cases. It is worth noting that the reason this uses vma_is_dax(), and the resulting header include changes, is that previous attempts to add a VM_DAX flag were NAKd. Link: http://lkml.kernel.org/r/149739531127.20686.15813586620597484283.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reviewed-by: Ross Zwisler Cc: Jan Kara Cc: Christoph Hellwig Cc: "Kirill A. Shutemov" Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dax.h | 5 ----- include/linux/fs.h | 6 ++++++ include/linux/huge_mm.h | 5 +++++ 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/dax.h b/include/linux/dax.h index 8f39db7439c3..794811875732 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -154,11 +154,6 @@ static inline unsigned int dax_radix_order(void *entry) #endif int dax_pfn_mkwrite(struct vm_fault *vmf); -static inline bool vma_is_dax(struct vm_area_struct *vma) -{ - return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); -} - static inline bool dax_mapping(struct address_space *mapping) { return mapping->host && IS_DAX(mapping->host); diff --git a/include/linux/fs.h b/include/linux/fs.h index 0cfa47125d52..78e1dbbe4cfd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -3127,6 +3128,11 @@ static inline bool io_is_direct(struct file *filp) return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host); } +static inline bool vma_is_dax(struct vm_area_struct *vma) +{ + return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); +} + static inline int iocb_flags(struct file *file) { int res = 0; diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f4239d3c9c73..ee696347f928 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -3,6 +3,8 @@ #include +#include /* only for vma_is_dax() */ + extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf); extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, @@ -103,6 +105,9 @@ static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma) if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG)) return true; + if (vma_is_dax(vma)) + return true; + if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)) return !!(vma->vm_flags & VM_HUGEPAGE); -- cgit v1.2.3 From 108a7ac448caff8e35e8c3f92f65faad893e5aca Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 10 Jul 2017 15:48:28 -0700 Subject: include/linux/page_ref.h: ensure page_ref_unfreeze is ordered against prior accesses page_ref_freeze and page_ref_unfreeze are designed to be used as a pair, wrapping a critical section where struct pages can be modified without having to worry about consistency for a concurrent fast-GUP. Whilst page_ref_freeze has full barrier semantics due to its use of atomic_cmpxchg, page_ref_unfreeze is implemented using atomic_set, which doesn't provide any barrier semantics and allows the operation to be reordered with respect to page modifications in the critical section. This patch ensures that page_ref_unfreeze is ordered after any critical section updates, by invoking smp_mb() prior to the atomic_set. Link: http://lkml.kernel.org/r/1497349722-6731-3-git-send-email-will.deacon@arm.com Signed-off-by: Will Deacon Acked-by: Steve Capper Acked-by: Kirill A. Shutemov Cc: Mark Rutland Cc: Mel Gorman Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_ref.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 610e13271918..1fd71733aa68 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -174,6 +174,7 @@ static inline void page_ref_unfreeze(struct page *page, int count) VM_BUG_ON_PAGE(page_count(page) != 0, page); VM_BUG_ON(count == 0); + smp_mb(); atomic_set(&page->_refcount, count); if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze)) __page_ref_unfreeze(page, count); -- cgit v1.2.3 From 4db9b2efe94967be34e3b136a93251a3c1736dd5 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 10 Jul 2017 15:48:44 -0700 Subject: hugetlb, memory_hotplug: prefer to use reserved pages for migration new_node_page will try to use the origin's next NUMA node as the migration destination for hugetlb pages. If such a node doesn't have any preallocated pool it falls back to __alloc_buddy_huge_page_no_mpol to allocate a surplus page instead. This is quite subotpimal for any configuration when hugetlb pages are no distributed to all NUMA nodes evenly. Say we have a hotplugable node 4 and spare hugetlb pages are node 0 /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages:10000 /sys/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages:0 /sys/devices/system/node/node2/hugepages/hugepages-2048kB/nr_hugepages:0 /sys/devices/system/node/node3/hugepages/hugepages-2048kB/nr_hugepages:0 /sys/devices/system/node/node4/hugepages/hugepages-2048kB/nr_hugepages:10000 /sys/devices/system/node/node5/hugepages/hugepages-2048kB/nr_hugepages:0 /sys/devices/system/node/node6/hugepages/hugepages-2048kB/nr_hugepages:0 /sys/devices/system/node/node7/hugepages/hugepages-2048kB/nr_hugepages:0 Now we consume the whole pool on node 4 and try to offline this node. All the allocated pages should be moved to node0 which has enough preallocated pages to hold them. With the current implementation offlining very likely fails because hugetlb allocations during runtime are much less reliable. Fix this by reusing the nodemask which excludes migration source and try to find a first node which has a page in the preallocated pool first and fall back to __alloc_buddy_huge_page_no_mpol only when the whole pool is consumed. [akpm@linux-foundation.org: remove bogus arg from alloc_huge_page_nodemask() stub] Link: http://lkml.kernel.org/r/20170608074553.22152-3-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Naoya Horiguchi Cc: Xishi Qiu Cc: zhong jiang Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 57f700ac127e..8fd0725d3f30 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -349,6 +349,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, struct page *alloc_huge_page_node(struct hstate *h, int nid); struct page *alloc_huge_page_noerr(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); +struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask); int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); @@ -524,6 +525,7 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr struct hstate {}; #define alloc_huge_page(v, a, r) NULL #define alloc_huge_page_node(h, nid) NULL +#define alloc_huge_page_nodemask(h, nmask) NULL #define alloc_huge_page_noerr(v, a, r) NULL #define alloc_bootmem_huge_page(h) NULL #define hstate_file(f) NULL -- cgit v1.2.3 From 8b9132388964df2cfe151a88fd1dd8219dabf23c Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 10 Jul 2017 15:48:47 -0700 Subject: mm: unify new_node_page and alloc_migrate_target Commit 394e31d2ceb4 ("mem-hotplug: alloc new page from a nearest neighbor node when mem-offline") has duplicated a large part of alloc_migrate_target with some hotplug specific special casing. To be more precise it tried to enfore the allocation from a different node than the original page. As a result the two function diverged in their shared logic, e.g. the hugetlb allocation strategy. Let's unify the two and express different NUMA requirements by the given nodemask. new_node_page will simply exclude the node it doesn't care about and alloc_migrate_target will use all the available nodes. alloc_migrate_target will then learn to migrate hugetlb pages more sanely and use preallocated pool when possible. Please note that alloc_migrate_target used to call alloc_page resp. alloc_pages_current so the memory policy of the current context which is quite strange when we consider that it is used in the context of alloc_contig_range which just tries to migrate pages which stand in the way. Link: http://lkml.kernel.org/r/20170608074553.22152-4-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Naoya Horiguchi Cc: Xishi Qiu Cc: zhong jiang Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 48e24844b3c5..d9675b665cc4 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -4,6 +4,7 @@ #include #include #include +#include typedef struct page *new_page_t(struct page *page, unsigned long private, int **reason); @@ -30,6 +31,21 @@ enum migrate_reason { /* In mm/debug.c; also keep sync with include/trace/events/migrate.h */ extern char *migrate_reason_names[MR_TYPES]; +static inline struct page *new_page_nodemask(struct page *page, + int preferred_nid, nodemask_t *nodemask) +{ + gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; + + if (PageHuge(page)) + return alloc_huge_page_nodemask(page_hstate(compound_head(page)), + nodemask); + + if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) + gfp_mask |= __GFP_HIGHMEM; + + return __alloc_pages_nodemask(gfp_mask, 0, preferred_nid, nodemask); +} + #ifdef CONFIG_MIGRATION extern void putback_movable_pages(struct list_head *l); -- cgit v1.2.3 From 422580c3cea7faaca67f6199375b79565d3d8ebd Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 10 Jul 2017 15:49:05 -0700 Subject: mm/oom_kill.c: add tracepoints for oom reaper-related events During the debugging of the problem described in https://lkml.org/lkml/2017/5/17/542 and fixed by Tetsuo Handa in https://lkml.org/lkml/2017/5/19/383 , I've found that the existing debug output is not really useful to understand issues related to the oom reaper. So, I assume, that adding some tracepoints might help with debugging of similar issues. Trace the following events: 1) a process is marked as an oom victim, 2) a process is added to the oom reaper list, 3) the oom reaper starts reaping process's mm, 4) the oom reaper finished reaping, 5) the oom reaper skips reaping. How it works in practice? Below is an example which show how the problem mentioned above can be found: one process is added twice to the oom_reaper list: $ cd /sys/kernel/debug/tracing $ echo "oom:mark_victim" > set_event $ echo "oom:wake_reaper" >> set_event $ echo "oom:skip_task_reaping" >> set_event $ echo "oom:start_task_reaping" >> set_event $ echo "oom:finish_task_reaping" >> set_event $ cat trace_pipe allocate-502 [001] .... 91.836405: mark_victim: pid=502 allocate-502 [001] .N.. 91.837356: wake_reaper: pid=502 allocate-502 [000] .N.. 91.871149: wake_reaper: pid=502 oom_reaper-23 [000] .... 91.871177: start_task_reaping: pid=502 oom_reaper-23 [000] .N.. 91.879511: finish_task_reaping: pid=502 oom_reaper-23 [000] .... 91.879580: skip_task_reaping: pid=502 Link: http://lkml.kernel.org/r/20170530185231.GA13412@castle Signed-off-by: Roman Gushchin Acked-by: Michal Hocko Cc: Tetsuo Handa Cc: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/oom.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'include') diff --git a/include/trace/events/oom.h b/include/trace/events/oom.h index 38baeb27221a..c3c19d47ae5e 100644 --- a/include/trace/events/oom.h +++ b/include/trace/events/oom.h @@ -70,6 +70,86 @@ TRACE_EVENT(reclaim_retry_zone, __entry->wmark_check) ); +TRACE_EVENT(mark_victim, + TP_PROTO(int pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __field(int, pid) + ), + + TP_fast_assign( + __entry->pid = pid; + ), + + TP_printk("pid=%d", __entry->pid) +); + +TRACE_EVENT(wake_reaper, + TP_PROTO(int pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __field(int, pid) + ), + + TP_fast_assign( + __entry->pid = pid; + ), + + TP_printk("pid=%d", __entry->pid) +); + +TRACE_EVENT(start_task_reaping, + TP_PROTO(int pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __field(int, pid) + ), + + TP_fast_assign( + __entry->pid = pid; + ), + + TP_printk("pid=%d", __entry->pid) +); + +TRACE_EVENT(finish_task_reaping, + TP_PROTO(int pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __field(int, pid) + ), + + TP_fast_assign( + __entry->pid = pid; + ), + + TP_printk("pid=%d", __entry->pid) +); + +TRACE_EVENT(skip_task_reaping, + TP_PROTO(int pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __field(int, pid) + ), + + TP_fast_assign( + __entry->pid = pid; + ), + + TP_printk("pid=%d", __entry->pid) +); + #ifdef CONFIG_COMPACTION TRACE_EVENT(compact_retry, -- cgit v1.2.3 From aaf14e40a33a2c9350471387031ca40c00f5a006 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 10 Jul 2017 15:49:08 -0700 Subject: mm, hugetlb: unclutter hugetlb allocation layers Patch series "mm, hugetlb: allow proper node fallback dequeue". While working on a hugetlb migration issue addressed in a separate patchset[1] I have noticed that the hugetlb allocations from the preallocated pool are quite subotimal. [1] //lkml.kernel.org/r/20170608074553.22152-1-mhocko@kernel.org There is no fallback mechanism implemented and no notion of preferred node. I have tried to work around it but Vlastimil was right to push back for a more robust solution. It seems that such a solution is to reuse zonelist approach we use for the page alloctor. This series has 3 patches. The first one tries to make hugetlb allocation layers more clear. The second one implements the zonelist hugetlb pool allocation and introduces a preferred node semantic which is used by the migration callbacks. The last patch is a clean up. This patch (of 3): Hugetlb allocation path for fresh huge pages is unnecessarily complex and it mixes different interfaces between layers. __alloc_buddy_huge_page is the central place to perform a new allocation. It checks for the hugetlb overcommit and then relies on __hugetlb_alloc_buddy_huge_page to invoke the page allocator. This is all good except that __alloc_buddy_huge_page pushes vma and address down the callchain and so __hugetlb_alloc_buddy_huge_page has to deal with two different allocation modes - one for memory policy and other node specific (or to make it more obscure node non-specific) requests. This just screams for a reorganization. This patch pulls out all the vma specific handling up to __alloc_buddy_huge_page_with_mpol where it belongs. __alloc_buddy_huge_page will get nodemask argument and __hugetlb_alloc_buddy_huge_page will become a trivial wrapper over the page allocator. In short: __alloc_buddy_huge_page_with_mpol - memory policy handling __alloc_buddy_huge_page - overcommit handling and accounting __hugetlb_alloc_buddy_huge_page - page allocator layer Also note that __hugetlb_alloc_buddy_huge_page and its cpuset retry loop is not really needed because the page allocator already handles the cpusets update. Finally __hugetlb_alloc_buddy_huge_page had a special case for node specific allocations (when no policy is applied and there is a node given). This has relied on __GFP_THISNODE to not fallback to a different node. alloc_huge_page_node is the only caller which relies on this behavior so move the __GFP_THISNODE there. Not only does this remove quite some code it also should make those layers easier to follow and clear wrt responsibilities. Link: http://lkml.kernel.org/r/20170622193034.28972-2-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Reviewed-by: Mike Kravetz Tested-by: Mike Kravetz Cc: Naoya Horiguchi Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8fd0725d3f30..66b621469f52 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -349,7 +349,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, struct page *alloc_huge_page_node(struct hstate *h, int nid); struct page *alloc_huge_page_noerr(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); -struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask); +struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask); int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); -- cgit v1.2.3 From 3e59fcb0e8c1c40aecb60fa4c2d1543d6a097184 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 10 Jul 2017 15:49:11 -0700 Subject: hugetlb: add support for preferred node to alloc_huge_page_nodemask alloc_huge_page_nodemask tries to allocate from any numa node in the allowed node mask starting from lower numa nodes. This might lead to filling up those low NUMA nodes while others are not used. We can reduce this risk by introducing a concept of the preferred node similar to what we have in the regular page allocator. We will start allocating from the preferred nid and then iterate over all allowed nodes in the zonelist order until we try them all. This is mimicing the page allocator logic except it operates on per-node mempools. dequeue_huge_page_vma already does this so distill the zonelist logic into a more generic dequeue_huge_page_nodemask and use it in alloc_huge_page_nodemask. This will allow us to use proper per numa distance fallback also for alloc_huge_page_node which can use alloc_huge_page_nodemask now and we can get rid of alloc_huge_page_node helper which doesn't have any user anymore. Link: http://lkml.kernel.org/r/20170622193034.28972-3-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Reviewed-by: Mike Kravetz Tested-by: Mike Kravetz Cc: Naoya Horiguchi Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 5 +++-- include/linux/migrate.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 66b621469f52..8d9fe131a240 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -349,7 +349,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, struct page *alloc_huge_page_node(struct hstate *h, int nid); struct page *alloc_huge_page_noerr(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); -struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask); +struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, + nodemask_t *nmask); int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); @@ -525,7 +526,7 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr struct hstate {}; #define alloc_huge_page(v, a, r) NULL #define alloc_huge_page_node(h, nid) NULL -#define alloc_huge_page_nodemask(h, nmask) NULL +#define alloc_huge_page_nodemask(h, preferred_nid, nmask) NULL #define alloc_huge_page_noerr(v, a, r) NULL #define alloc_bootmem_huge_page(h) NULL #define hstate_file(f) NULL diff --git a/include/linux/migrate.h b/include/linux/migrate.h index d9675b665cc4..4634da521238 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -38,7 +38,7 @@ static inline struct page *new_page_nodemask(struct page *page, if (PageHuge(page)) return alloc_huge_page_nodemask(page_hstate(compound_head(page)), - nodemask); + preferred_nid, nodemask); if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) gfp_mask |= __GFP_HIGHMEM; -- cgit v1.2.3 From 618b8c20d03c9ea06711bd36d906322ba35c0add Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 10 Jul 2017 15:49:32 -0700 Subject: include/linux/mmzone.h: remove ancient/ambiguous comment Currently pg_data_t is just a struct which describes a NUMA node memory layout. Let's keep the comment simple and remove ambiguity. Link: http://lkml.kernel.org/r/1498220534-22717-1-git-send-email-nborisov@suse.com Signed-off-by: Nikolay Borisov Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7e8f100cb56d..16532fa0bb64 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -603,12 +603,9 @@ extern struct page *mem_map; #endif /* - * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM - * (mostly NUMA machines?) to denote a higher-level memory zone than the - * zone denotes. - * * On NUMA machines, each NUMA node would have a pg_data_t to describe - * it's memory layout. + * it's memory layout. On UMA machines there is a single pglist_data which + * describes the whole memory. * * Memory statistics and page replacement data structures are maintained on a * per-zone basis. -- cgit v1.2.3 From e3d3910a57ab9c70cddb2522ae711ff9bff89e7c Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 10 Jul 2017 15:49:35 -0700 Subject: include/linux/backing-dev.h: simplify wb_stat_sum wb_stat_sum() disables interrupts and calls __wb_stat_sum() which eventually calls __percpu_counter_sum(). However, the percpu routine is already irq-safe. Simplify the code a bit by making wb_stat_sum() directly call percpu_counter_sum_positive() and not disable interrupts. Also remove the now-uneeded __wb_stat_sum() which was just a wrapper over percpu_counter_sum_positive(). Link: http://lkml.kernel.org/r/1498230681-29103-1-git-send-email-nborisov@suse.com Signed-off-by: Nikolay Borisov Acked-by: Peter Zijlstra Cc: Tejun Heo Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index ace73f96eb1e..334165c911f0 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -104,22 +104,9 @@ static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) return percpu_counter_read_positive(&wb->stat[item]); } -static inline s64 __wb_stat_sum(struct bdi_writeback *wb, - enum wb_stat_item item) -{ - return percpu_counter_sum_positive(&wb->stat[item]); -} - static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item) { - s64 sum; - unsigned long flags; - - local_irq_save(flags); - sum = __wb_stat_sum(wb, item); - local_irq_restore(flags); - - return sum; + return percpu_counter_sum_positive(&wb->stat[item]); } extern void wb_writeout_inc(struct bdi_writeback *wb); -- cgit v1.2.3 From 2c80cd57c74339889a8752b20862a16c28929c3a Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Mon, 10 Jul 2017 15:49:57 -0700 Subject: mm/list_lru.c: fix list_lru_count_node() to be race free list_lru_count_node() iterates over all memcgs to get the total number of entries on the node but it can race with memcg_drain_all_list_lrus(), which migrates the entries from a dead cgroup to another. This can return incorrect number of entries from list_lru_count_node(). Fix this by keeping track of entries per node and simply return it in list_lru_count_node(). Link: http://lkml.kernel.org/r/1498707555-30525-1-git-send-email-stummala@codeaurora.org Signed-off-by: Sahitya Tummala Acked-by: Vladimir Davydov Cc: Jan Kara Cc: Alexander Polakov Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list_lru.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index cb0ba9f2a9a2..fa7fd03cb5f9 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -44,6 +44,7 @@ struct list_lru_node { /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ struct list_lru_memcg *memcg_lrus; #endif + long nr_items; } ____cacheline_aligned_in_smp; struct list_lru { -- cgit v1.2.3 From a47fed5b5b014f5a13878b90ef2c3a7dc294189f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jul 2017 15:50:06 -0700 Subject: mm: swap: provide lru_add_drain_all_cpuslocked() The rework of the cpu hotplug locking unearthed potential deadlocks with the memory hotplug locking code. The solution for these is to rework the memory hotplug locking code as well and take the cpu hotplug lock before the memory hotplug lock in mem_hotplug_begin(), but this will cause a recursive locking of the cpu hotplug lock when the memory hotplug code calls lru_add_drain_all(). Split out the inner workings of lru_add_drain_all() into lru_add_drain_all_cpuslocked() so this function can be invoked from the memory hotplug code with the cpu hotplug lock held. Link: http://lkml.kernel.org/r/20170704093421.419329357@linutronix.de Signed-off-by: Thomas Gleixner Reported-by: Andrey Ryabinin Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Vladimir Davydov Cc: Peter Zijlstra Cc: Davidlohr Bueso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 61e7180cee21..d83d28e53e62 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -277,6 +277,7 @@ extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_all(void); +extern void lru_add_drain_all_cpuslocked(void); extern void rotate_reclaimable_page(struct page *page); extern void deactivate_file_page(struct page *page); extern void mark_page_lazyfree(struct page *page); -- cgit v1.2.3 From 9d1f4b3f5b29bea431525e528a3ff2dc806ad904 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 10 Jul 2017 15:50:12 -0700 Subject: mm: disallow early_pfn_to_nid on configurations which do not implement it early_pfn_to_nid will return node 0 if both HAVE_ARCH_EARLY_PFN_TO_NID and HAVE_MEMBLOCK_NODE_MAP are disabled. It seems we are safe now because all architectures which support NUMA define one of them (with an exception of alpha which however has CONFIG_NUMA marked as broken) so this works as expected. It can get silently and subtly broken too easily, though. Make sure we fail the compilation if NUMA is enabled and there is no proper implementation for this function. If that ever happens we know that either the specific configuration is invalid and the fix should either disable NUMA or enable one of the above configs. Link: http://lkml.kernel.org/r/20170704075803.15979-1-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Joonsoo Kim Cc: Yang Shi Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 16532fa0bb64..fc14b8b3f6ce 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1055,6 +1055,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) static inline unsigned long early_pfn_to_nid(unsigned long pfn) { + BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA)); return 0; } #endif -- cgit v1.2.3 From 0b396923ee9bdcb4a208df2148712b79b6dee73e Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 10 Jul 2017 15:50:55 -0700 Subject: asm-generic/bug.h: declare struct pt_regs; before function prototype This series of patches splits BUILD_BUG related macros out of "include/linux/bug.h" into new file "include/linux/build_bug.h" (patch 5), and changes the pointer type checking in the `container_of()` macro to deal with pointers of array type better (patch 6). Patches 1 to 4 are prerequisites. Patches 2, 3, 4, and 5 have been inserted since the previous version of this patch series. Patch 6 here corresponds to v3 and v4's patch 2. Patch 1 was a prerequisite in v3 of this series to avoid a lot of warnings when was included by . That is no longer relevant for v5 of the series, but I left it in because it was acked by a Arnd Bergmann and Michal Nazarewicz. Patches 2, 3, and 4 are some checkpatch clean-ups on "include/linux/bug.h" before splitting out the BUILD_BUG stuff in patch 5. Patch 5 splits the BUILD_BUG related macros out of "include/linux/bug.h" into new file "include/linux/build_bug.h" because including in "include/linux/kernel.h" would result in build failures due to circular dependencies. Patch 6 changes the pointer type checking by `container_of()` to avoid some incompatible pointer warnings when the dereferenced pointer has array type. 1) asm-generic/bug.h: declare struct pt_regs; before function prototype 2) linux/bug.h: correct formatting of block comment 3) linux/bug.h: correct "(foo*)" should be "(foo *)" 4) linux/bug.h: correct "space required before that '-'" 5) bug: split BUILD_BUG stuff out into 6) kernel.h: handle pointers to arrays better in container_of() This patch (of 6): The declaration of `__warn()` has `struct pt_regs *regs` as one of its parameters. This can result in compiler warnings if `struct regs` is not already declared. Add an empty declaration of `struct pt_regs` to avoid the warnings. Link: http://lkml.kernel.org/r/20170525120316.24473-2-abbotti@mev.co.uk Signed-off-by: Ian Abbott Acked-by: Arnd Bergmann Acked-by: Michal Nazarewicz Cc: Arnd Bergmann Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/bug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index d6f4aed479a1..87191357d303 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -97,6 +97,7 @@ extern void warn_slowpath_null(const char *file, const int line); /* used internally by panic.c */ struct warn_args; +struct pt_regs; void __warn(const char *file, int line, void *caller, unsigned taint, struct pt_regs *regs, struct warn_args *args); -- cgit v1.2.3 From e9d5a48499391fe5b0615610858665ba8149e255 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 10 Jul 2017 15:50:58 -0700 Subject: linux/bug.h: correct formatting of block comment Correct these checkpatch.pl warnings: |WARNING: Block comments use * on subsequent lines |#34: FILE: include/linux/bug.h:34: |+/* Force a compilation error if condition is true, but also produce a |+ result (of value 0 and type size_t), so the expression can be used |WARNING: Block comments use a trailing */ on a separate line |#36: FILE: include/linux/bug.h:36: |+ aren't permitted). */ Link: http://lkml.kernel.org/r/20170525120316.24473-3-abbotti@mev.co.uk Signed-off-by: Ian Abbott Acked-by: Michal Nazarewicz Cc: Kees Cook Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Jakub Kicinski Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bug.h b/include/linux/bug.h index 687b557fc5eb..ca24007e2dc3 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -30,10 +30,12 @@ struct pt_regs; #define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) -/* Force a compilation error if condition is true, but also produce a - result (of value 0 and type size_t), so the expression can be used - e.g. in a structure initializer (or where-ever else comma expressions - aren't permitted). */ +/* + * Force a compilation error if condition is true, but also produce a + * result (of value 0 and type size_t), so the expression can be used + * e.g. in a structure initializer (or where-ever else comma expressions + * aren't permitted). + */ #define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) -- cgit v1.2.3 From 8cdd7cca9287abf4c849c01e2a4e8207ad3e3a82 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 10 Jul 2017 15:51:01 -0700 Subject: linux/bug.h: correct "(foo*)" should be "(foo *)" Correct this checkpatch.pl error: |ERROR: "(foo*)" should be "(foo *)" |#19: FILE: include/linux/bug.h:19: |+#define BUILD_BUG_ON_NULL(e) ((void*)0) Link: http://lkml.kernel.org/r/20170525120316.24473-4-abbotti@mev.co.uk Signed-off-by: Ian Abbott Acked-by: Michal Nazarewicz Cc: Kees Cook Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Jakub Kicinski Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bug.h b/include/linux/bug.h index ca24007e2dc3..216a1b79653d 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -16,7 +16,7 @@ struct pt_regs; #define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) #define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) #define BUILD_BUG_ON_ZERO(e) (0) -#define BUILD_BUG_ON_NULL(e) ((void*)0) +#define BUILD_BUG_ON_NULL(e) ((void *)0) #define BUILD_BUG_ON_INVALID(e) (0) #define BUILD_BUG_ON_MSG(cond, msg) (0) #define BUILD_BUG_ON(condition) (0) -- cgit v1.2.3 From 47e81e59d98b90727a02ceb486407eeed5eb8727 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 10 Jul 2017 15:51:04 -0700 Subject: linux/bug.h: correct "space required before that '-'" Correct these checkpatch.pl errors: |ERROR: space required before that '-' (ctx:OxO) |#37: FILE: include/linux/bug.h:37: |+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) |ERROR: space required before that '-' (ctx:OxO) |#38: FILE: include/linux/bug.h:38: |+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) I decided to wrap the bitfield expressions that begin with minus signs in parentheses rather than insert spaces before the minus signs. Link: http://lkml.kernel.org/r/20170525120316.24473-5-abbotti@mev.co.uk Signed-off-by: Ian Abbott Acked-by: Michal Nazarewicz Cc: Kees Cook Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Jakub Kicinski Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bug.h b/include/linux/bug.h index 216a1b79653d..483207cb99fb 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -36,8 +36,8 @@ struct pt_regs; * e.g. in a structure initializer (or where-ever else comma expressions * aren't permitted). */ -#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) -#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); })) +#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:(-!!(e)); })) /* * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the -- cgit v1.2.3 From bc6245e5efd70c41eaf9334b1b5e646745cb0fb3 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 10 Jul 2017 15:51:07 -0700 Subject: bug: split BUILD_BUG stuff out into Including pulls in a lot of bloat from and that is not needed to call the BUILD_BUG() family of macros. Split them out into their own header, . Also correct some checkpatch.pl errors for the BUILD_BUG_ON_ZERO() and BUILD_BUG_ON_NULL() macros by adding parentheses around the bitfield widths that begin with a minus sign. Link: http://lkml.kernel.org/r/20170525120316.24473-6-abbotti@mev.co.uk Signed-off-by: Ian Abbott Acked-by: Michal Nazarewicz Acked-by: Kees Cook Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Jakub Kicinski Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 74 +---------------------------------------- include/linux/build_bug.h | 84 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 73 deletions(-) create mode 100644 include/linux/build_bug.h (limited to 'include') diff --git a/include/linux/bug.h b/include/linux/bug.h index 483207cb99fb..5d5554c874fd 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -3,6 +3,7 @@ #include #include +#include enum bug_trap_type { BUG_TRAP_TYPE_NONE = 0, @@ -13,82 +14,9 @@ enum bug_trap_type { struct pt_regs; #ifdef __CHECKER__ -#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) -#define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) -#define BUILD_BUG_ON_ZERO(e) (0) -#define BUILD_BUG_ON_NULL(e) ((void *)0) -#define BUILD_BUG_ON_INVALID(e) (0) -#define BUILD_BUG_ON_MSG(cond, msg) (0) -#define BUILD_BUG_ON(condition) (0) -#define BUILD_BUG() (0) #define MAYBE_BUILD_BUG_ON(cond) (0) #else /* __CHECKER__ */ -/* Force a compilation error if a constant expression is not a power of 2 */ -#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ - BUILD_BUG_ON(((n) & ((n) - 1)) != 0) -#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ - BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) - -/* - * Force a compilation error if condition is true, but also produce a - * result (of value 0 and type size_t), so the expression can be used - * e.g. in a structure initializer (or where-ever else comma expressions - * aren't permitted). - */ -#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); })) -#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:(-!!(e)); })) - -/* - * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the - * expression but avoids the generation of any code, even if that expression - * has side-effects. - */ -#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e)))) - -/** - * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied - * error message. - * @condition: the condition which the compiler should know is false. - * - * See BUILD_BUG_ON for description. - */ -#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) - -/** - * BUILD_BUG_ON - break compile if a condition is true. - * @condition: the condition which the compiler should know is false. - * - * If you have some code which relies on certain constants being equal, or - * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to - * detect if someone changes it. - * - * The implementation uses gcc's reluctance to create a negative array, but gcc - * (as of 4.4) only emits that error for obvious cases (e.g. not arguments to - * inline functions). Luckily, in 4.3 they added the "error" function - * attribute just for this type of case. Thus, we use a negative sized array - * (should always create an error on gcc versions older than 4.4) and then call - * an undefined function with the error attribute (should always create an - * error on gcc 4.3 and later). If for some reason, neither creates a - * compile-time error, we'll still have a link-time error, which is harder to - * track down. - */ -#ifndef __OPTIMIZE__ -#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) -#else -#define BUILD_BUG_ON(condition) \ - BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition) -#endif - -/** - * BUILD_BUG - break compile if used. - * - * If you have some code that you expect the compiler to eliminate at - * build time, you should use BUILD_BUG to detect if it is - * unexpectedly used. - */ -#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed") - #define MAYBE_BUILD_BUG_ON(cond) \ do { \ if (__builtin_constant_p((cond))) \ diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h new file mode 100644 index 000000000000..b7d22d60008a --- /dev/null +++ b/include/linux/build_bug.h @@ -0,0 +1,84 @@ +#ifndef _LINUX_BUILD_BUG_H +#define _LINUX_BUILD_BUG_H + +#include + +#ifdef __CHECKER__ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) +#define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) +#define BUILD_BUG_ON_ZERO(e) (0) +#define BUILD_BUG_ON_NULL(e) ((void *)0) +#define BUILD_BUG_ON_INVALID(e) (0) +#define BUILD_BUG_ON_MSG(cond, msg) (0) +#define BUILD_BUG_ON(condition) (0) +#define BUILD_BUG() (0) +#else /* __CHECKER__ */ + +/* Force a compilation error if a constant expression is not a power of 2 */ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON(((n) & ((n) - 1)) != 0) +#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) + +/* + * Force a compilation error if condition is true, but also produce a + * result (of value 0 and type size_t), so the expression can be used + * e.g. in a structure initializer (or where-ever else comma expressions + * aren't permitted). + */ +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); })) +#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:(-!!(e)); })) + +/* + * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the + * expression but avoids the generation of any code, even if that expression + * has side-effects. + */ +#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e)))) + +/** + * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied + * error message. + * @condition: the condition which the compiler should know is false. + * + * See BUILD_BUG_ON for description. + */ +#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) + +/** + * BUILD_BUG_ON - break compile if a condition is true. + * @condition: the condition which the compiler should know is false. + * + * If you have some code which relies on certain constants being equal, or + * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to + * detect if someone changes it. + * + * The implementation uses gcc's reluctance to create a negative array, but gcc + * (as of 4.4) only emits that error for obvious cases (e.g. not arguments to + * inline functions). Luckily, in 4.3 they added the "error" function + * attribute just for this type of case. Thus, we use a negative sized array + * (should always create an error on gcc versions older than 4.4) and then call + * an undefined function with the error attribute (should always create an + * error on gcc 4.3 and later). If for some reason, neither creates a + * compile-time error, we'll still have a link-time error, which is harder to + * track down. + */ +#ifndef __OPTIMIZE__ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#else +#define BUILD_BUG_ON(condition) \ + BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition) +#endif + +/** + * BUILD_BUG - break compile if used. + * + * If you have some code that you expect the compiler to eliminate at + * build time, you should use BUILD_BUG to detect if it is + * unexpectedly used. + */ +#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed") + +#endif /* __CHECKER__ */ + +#endif /* _LINUX_BUILD_BUG_H */ -- cgit v1.2.3 From 287f3ca563d8ba0ede4ac0cec84218a1ea5e848f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 10 Jul 2017 15:51:10 -0700 Subject: ARM: fix rd_size declaration The global variable 'rd_size' is declared as 'int' in source file arch/arm/kernel/atags_parse.c and as 'unsigned long' in drivers/block/brd.c. Fix this inconsistency. Additionally, remove the declarations of rd_image_start, rd_prompt and rd_doload from parse_tag_ramdisk() since these duplicate existing declarations in . Link: http://lkml.kernel.org/r/20170627065024.12347-1-bart.vanassche@wdc.com Signed-off-by: Bart Van Assche Acked-by: Russell King Cc: Jens Axboe Cc: Jan Kara Cc: Jason Yan Cc: Zhaohongjiang Cc: Miao Xie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/initrd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/initrd.h b/include/linux/initrd.h index 55289d261b4f..bc67b767f9ce 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -10,6 +10,9 @@ extern int rd_prompt; /* starting block # of image */ extern int rd_image_start; +/* size of a single RAM disk */ +extern unsigned long rd_size; + /* 1 if it is not an error if initrd_start < memory_start */ extern int initrd_below_start_ok; -- cgit v1.2.3 From e5af323c9badd5dc09af7ccf9d45616ebffc623c Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 10 Jul 2017 15:51:29 -0700 Subject: bitmap: optimise bitmap_set and bitmap_clear of a single bit We have eight users calling bitmap_clear for a single bit and seventeen calling bitmap_set for a single bit. Rather than fix all of them to call __clear_bit or __set_bit, turn bitmap_clear and bitmap_set into inline functions and make this special case efficient. Link: http://lkml.kernel.org/r/20170628153221.11322-3-willy@infradead.org Signed-off-by: Matthew Wilcox Acked-by: Rasmus Villemoes Cc: Martin Schwidefsky Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 3b77588a9360..4e0f0c8167af 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -112,9 +112,8 @@ extern int __bitmap_intersects(const unsigned long *bitmap1, extern int __bitmap_subset(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); - -extern void bitmap_set(unsigned long *map, unsigned int start, int len); -extern void bitmap_clear(unsigned long *map, unsigned int start, int len); +extern void __bitmap_set(unsigned long *map, unsigned int start, int len); +extern void __bitmap_clear(unsigned long *map, unsigned int start, int len); extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map, unsigned long size, @@ -315,6 +314,24 @@ static __always_inline int bitmap_weight(const unsigned long *src, unsigned int return __bitmap_weight(src, nbits); } +static __always_inline void bitmap_set(unsigned long *map, unsigned int start, + unsigned int nbits) +{ + if (__builtin_constant_p(nbits) && nbits == 1) + __set_bit(start, map); + else + __bitmap_set(map, start, nbits); +} + +static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, + unsigned int nbits) +{ + if (__builtin_constant_p(nbits) && nbits == 1) + __clear_bit(start, map); + else + __bitmap_clear(map, start, nbits); +} + static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, unsigned int shift, int nbits) { -- cgit v1.2.3 From 2a98dc028f911a7c59c87d11d4eed6626be1605b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 10 Jul 2017 15:51:32 -0700 Subject: include/linux/bitmap.h: turn bitmap_set and bitmap_clear into memset when possible Several callers have constant 'start' and an 'nbits' that is a multiple of 8, so we can turn them into calls to memset. We don't need the entirety of 'start' and 'nbits' to be constant, we just need to know whether they're divisible by 8. Link: http://lkml.kernel.org/r/20170628153221.11322-4-willy@infradead.org Signed-off-by: Matthew Wilcox Acked-by: Rasmus Villemoes Cc: Martin Schwidefsky Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 4e0f0c8167af..c04c9d155e59 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -319,6 +319,9 @@ static __always_inline void bitmap_set(unsigned long *map, unsigned int start, { if (__builtin_constant_p(nbits) && nbits == 1) __set_bit(start, map); + else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) && + __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8)) + memset((char *)map + start / 8, 0xff, nbits / 8); else __bitmap_set(map, start, nbits); } @@ -328,6 +331,9 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, { if (__builtin_constant_p(nbits) && nbits == 1) __clear_bit(start, map); + else if (__builtin_constant_p(start & 7) && IS_ALIGNED(start, 8) && + __builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8)) + memset((char *)map + start / 8, 0, nbits / 8); else __bitmap_clear(map, start, nbits); } -- cgit v1.2.3 From 2c6deb01525ac11cc03c44fe31e3f45ce2cadaf9 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 10 Jul 2017 15:51:35 -0700 Subject: bitmap: use memcmp optimisation in more situations Commit 7dd968163f7c ("bitmap: bitmap_equal memcmp optimization") was rather more restrictive than necessary; we can use memcmp() to implement bitmap_equal() as long as the number of bits can be proved to be a multiple of 8. And architectures other than s390 may be able to make good use of this optimisation. [arnd@arndb.de: fix build: add a memcmp() declaration] Link: http://lkml.kernel.org/r/20170630153908.3439707-1-arnd@arndb.de Link: http://lkml.kernel.org/r/20170628153221.11322-5-willy@infradead.org Signed-off-by: Matthew Wilcox Signed-off-by: Arnd Bergmann Acked-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index c04c9d155e59..5797ca6fdfe2 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -266,10 +266,8 @@ static inline int bitmap_equal(const unsigned long *src1, { if (small_const_nbits(nbits)) return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); -#ifdef CONFIG_S390 - if (__builtin_constant_p(nbits) && (nbits % BITS_PER_LONG) == 0) + if (__builtin_constant_p(nbits & 7) && IS_ALIGNED(nbits, 8)) return !memcmp(src1, src2, nbits / 8); -#endif return __bitmap_equal(src1, src2, nbits); } -- cgit v1.2.3 From a94c33dd1f677d16c4f1a162b4b3e9eba1b07c24 Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Mon, 10 Jul 2017 15:51:58 -0700 Subject: lib/extable.c: use bsearch() library function in search_extable() [thomas@m3y3r.de: v3: fix arch specific implementations] Link: http://lkml.kernel.org/r/1497890858.12931.7.camel@m3y3r.de Signed-off-by: Thomas Meyer Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/extable.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/extable.h b/include/linux/extable.h index 7effea4b257d..28addad0dda7 100644 --- a/include/linux/extable.h +++ b/include/linux/extable.h @@ -2,13 +2,14 @@ #define _LINUX_EXTABLE_H #include /* for NULL */ +#include struct module; struct exception_table_entry; const struct exception_table_entry * -search_extable(const struct exception_table_entry *first, - const struct exception_table_entry *last, +search_extable(const struct exception_table_entry *base, + const size_t num, unsigned long value); void sort_extable(struct exception_table_entry *start, struct exception_table_entry *finish); -- cgit v1.2.3 From c4fac9100456995c10b65c13be84554258ed7fc8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Jul 2017 16:25:37 +0100 Subject: 9p: Implement show_options Implement the show_options superblock op for 9p as part of a bid to get rid of s_options and generic_show_options() to make it easier to implement a context-based mount where the mount options can be passed individually over a file descriptor. Signed-off-by: David Howells cc: Eric Van Hensbergen cc: Ron Minnich cc: Latchesar Ionkov cc: v9fs-developer@lists.sourceforge.net Signed-off-by: Al Viro --- include/net/9p/client.h | 13 +++++++++++++ include/net/9p/transport.h | 1 + 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/net/9p/client.h b/include/net/9p/client.h index b582339ccef5..7af9d769b97d 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -157,6 +157,18 @@ struct p9_client { enum p9_trans_status status; void *trans; + union { + struct { + int rfd; + int wfd; + } fd; + struct { + u16 port; + bool privport; + + } tcp; + } trans_opts; + struct p9_idpool *fidpool; struct list_head fidlist; @@ -213,6 +225,7 @@ struct p9_dirent { struct iov_iter; +int p9_show_client_options(struct seq_file *m, struct p9_client *clnt); int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb); int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, const char *name); diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index 5122b5e40f78..1625fb842ac4 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -62,6 +62,7 @@ struct p9_trans_module { int (*cancelled)(struct p9_client *, struct p9_req_t *req); int (*zc_request)(struct p9_client *, struct p9_req_t *, struct iov_iter *, struct iov_iter *, int , int, int); + int (*show_options)(struct seq_file *, struct p9_client *); }; void v9fs_register_trans(struct p9_trans_module *m); -- cgit v1.2.3 From 1d278a879081ddc40286500e58868aaee47de257 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Jul 2017 16:25:53 +0100 Subject: VFS: Kill off s_options and helpers Kill off s_options, save/replace_mount_options() and generic_show_options() as all filesystems now implement ->show_options() for themselves. This should make it easier to implement a context-based mount where the mount options can be passed individually over a file descriptor. Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/fs.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index bc0c054894b9..e265b2ea72c6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1351,11 +1351,6 @@ struct super_block { */ char *s_subtype; - /* - * Saved mount options for lazy filesystems using - * generic_show_options() - */ - char __rcu *s_options; const struct dentry_operations *s_d_op; /* default d_op for dentries */ /* @@ -3033,10 +3028,6 @@ extern void setattr_copy(struct inode *inode, const struct iattr *attr); extern int file_update_time(struct file *file); -extern int generic_show_options(struct seq_file *m, struct dentry *root); -extern void save_mount_options(struct super_block *sb, char *options); -extern void replace_mount_options(struct super_block *sb, char *options); - static inline bool io_is_direct(struct file *filp) { return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host); -- cgit v1.2.3 From 7461279bba4a62d192eda6dd10de68ac0543bfcd Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 17 Jun 2017 13:52:46 -0700 Subject: dt-bindings: Document img,boston-clock binding Add device tree binding documentation for the clocks provided by the MIPS Boston development board from Imagination Technologies, and a header file describing the available clocks for use by device trees & driver. Signed-off-by: Paul Burton Acked-by: Stephen Boyd Cc: Frank Rowand Cc: Michael Turquette Cc: Rob Herring Cc: devicetree@vger.kernel.org Cc: linux-clk@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16482/ Signed-off-by: Ralf Baechle --- include/dt-bindings/clock/boston-clock.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/dt-bindings/clock/boston-clock.h (limited to 'include') diff --git a/include/dt-bindings/clock/boston-clock.h b/include/dt-bindings/clock/boston-clock.h new file mode 100644 index 000000000000..a6f009821137 --- /dev/null +++ b/include/dt-bindings/clock/boston-clock.h @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2016 Imagination Technologies + * + * SPDX-License-Identifier: GPL-2.0 + */ + +#ifndef __DT_BINDINGS_CLOCK_BOSTON_CLOCK_H__ +#define __DT_BINDINGS_CLOCK_BOSTON_CLOCK_H__ + +#define BOSTON_CLK_INPUT 0 +#define BOSTON_CLK_SYS 1 +#define BOSTON_CLK_CPU 2 + +#endif /* __DT_BINDINGS_CLOCK_BOSTON_CLOCK_H__ */ -- cgit v1.2.3 From 2fd1d2c4ceb2248a727696962cf3370dc9f5a0a4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 6 Jul 2017 08:41:06 -0500 Subject: proc: Fix proc_sys_prune_dcache to hold a sb reference Andrei Vagin writes: FYI: This bug has been reproduced on 4.11.7 > BUG: Dentry ffff895a3dd01240{i=4e7c09a,n=lo} still in use (1) [unmount of proc proc] > ------------[ cut here ]------------ > WARNING: CPU: 1 PID: 13588 at fs/dcache.c:1445 umount_check+0x6e/0x80 > CPU: 1 PID: 13588 Comm: kworker/1:1 Not tainted 4.11.7-200.fc25.x86_64 #1 > Hardware name: CompuLab sbc-flt1/fitlet, BIOS SBCFLT_0.08.04 06/27/2015 > Workqueue: events proc_cleanup_work > Call Trace: > dump_stack+0x63/0x86 > __warn+0xcb/0xf0 > warn_slowpath_null+0x1d/0x20 > umount_check+0x6e/0x80 > d_walk+0xc6/0x270 > ? dentry_free+0x80/0x80 > do_one_tree+0x26/0x40 > shrink_dcache_for_umount+0x2d/0x90 > generic_shutdown_super+0x1f/0xf0 > kill_anon_super+0x12/0x20 > proc_kill_sb+0x40/0x50 > deactivate_locked_super+0x43/0x70 > deactivate_super+0x5a/0x60 > cleanup_mnt+0x3f/0x90 > mntput_no_expire+0x13b/0x190 > kern_unmount+0x3e/0x50 > pid_ns_release_proc+0x15/0x20 > proc_cleanup_work+0x15/0x20 > process_one_work+0x197/0x450 > worker_thread+0x4e/0x4a0 > kthread+0x109/0x140 > ? process_one_work+0x450/0x450 > ? kthread_park+0x90/0x90 > ret_from_fork+0x2c/0x40 > ---[ end trace e1c109611e5d0b41 ]--- > VFS: Busy inodes after unmount of proc. Self-destruct in 5 seconds. Have a nice day... > BUG: unable to handle kernel NULL pointer dereference at (null) > IP: _raw_spin_lock+0xc/0x30 > PGD 0 Fix this by taking a reference to the super block in proc_sys_prune_dcache. The superblock reference is the core of the fix however the sysctl_inodes list is converted to a hlist so that hlist_del_init_rcu may be used. This allows proc_sys_prune_dache to remove inodes the sysctl_inodes list, while not causing problems for proc_sys_evict_inode when if it later choses to remove the inode from the sysctl_inodes list. Removing inodes from the sysctl_inodes list allows proc_sys_prune_dcache to have a progress guarantee, while still being able to drop all locks. The fact that head->unregistering is set in start_unregistering ensures that no more inodes will be added to the the sysctl_inodes list. Previously the code did a dance where it delayed calling iput until the next entry in the list was being considered to ensure the inode remained on the sysctl_inodes list until the next entry was walked to. The structure of the loop in this patch does not need that so is much easier to understand and maintain. Cc: stable@vger.kernel.org Reported-by: Andrei Vagin Tested-by: Andrei Vagin Fixes: ace0c791e6c3 ("proc/sysctl: Don't grab i_lock under sysctl_lock.") Fixes: d6cffbbe9a7e ("proc/sysctl: prune stale dentries during unregistering") Signed-off-by: "Eric W. Biederman" --- include/linux/sysctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 80d07816def0..1c04a26bfd2f 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -143,7 +143,7 @@ struct ctl_table_header struct ctl_table_set *set; struct ctl_dir *parent; struct ctl_node *node; - struct list_head inodes; /* head for proc_inode->sysctl_inodes */ + struct hlist_head inodes; /* head for proc_inode->sysctl_inodes */ }; struct ctl_dir { -- cgit v1.2.3 From 138d351eefb727ab9e41a3dc5f112ceb4f6e59f2 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 7 Jul 2017 14:45:49 -0700 Subject: iscsi-target: Add login_keys_workaround attribute for non RFC initiators This patch re-introduces part of a long standing login workaround that was recently dropped by: commit 1c99de981f30b3e7868b8d20ce5479fa1c0fea46 Author: Nicholas Bellinger Date: Sun Apr 2 13:36:44 2017 -0700 iscsi-target: Drop work-around for legacy GlobalSAN initiator Namely, the workaround for FirstBurstLength ended up being required by Mellanox Flexboot PXE boot ROMs as reported by Robert. So this patch re-adds the work-around for FirstBurstLength within iscsi_check_proposer_for_optional_reply(), and makes the key optional to respond when the initiator does not propose, nor respond to it. Also as requested by Arun, this patch introduces a new TPG attribute named 'login_keys_workaround' that controls the use of both the FirstBurstLength workaround, as well as the two other existing workarounds for gPXE iSCSI boot client. By default, the workaround is enabled with login_keys_workaround=1, since Mellanox FlexBoot requires it, and Arun has verified the Qlogic MSFT initiator already proposes FirstBurstLength, so it's uneffected by this re-adding this part of the original work-around. Reported-by: Robert LeBlanc Cc: Robert LeBlanc Reviewed-by: Arun Easi Cc: # 3.1+ Signed-off-by: Nicholas Bellinger --- include/target/iscsi/iscsi_target_core.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 7948fc68286b..0ca1fb08805b 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -66,6 +66,14 @@ struct sock; #define TA_DEFAULT_FABRIC_PROT_TYPE 0 /* TPG status needs to be enabled to return sendtargets discovery endpoint info */ #define TA_DEFAULT_TPG_ENABLED_SENDTARGETS 1 +/* + * Used to control the sending of keys with optional to respond state bit, + * as a workaround for non RFC compliant initiators,that do not propose, + * nor respond to specific keys required for login to complete. + * + * See iscsi_check_proposer_for_optional_reply() for more details. + */ +#define TA_DEFAULT_LOGIN_KEYS_WORKAROUND 1 #define ISCSI_IOV_DATA_BUFFER 5 @@ -768,6 +776,7 @@ struct iscsi_tpg_attrib { u8 t10_pi; u32 fabric_prot_type; u32 tpg_enabled_sendtargets; + u32 login_keys_workaround; struct iscsi_portal_group *tpg; }; -- cgit v1.2.3 From e8158b486d5f3f55cf372c5a32b42f263bf7f123 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 11 Jul 2017 18:20:20 +0300 Subject: device property: Introduce fwnode_call_bool_op() for ops that return bool fwnode_call_int_op() isn't suitable for calling ops that return bool since it effectively causes the result returned to the user to be true when an op hasn't been defined or the fwnode is NULL. Address this by introducing fwnode_call_bool_op() for calling ops that return bool. Fixes: 3708184afc77 "device property: Move FW type specific functionality to FW specific files" Fixes: 2294b3af05e9 "device property: Introduce fwnode_device_is_available()" Reported-by: Dan Carpenter Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- include/linux/fwnode.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 9ab375419189..50893a1646cf 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -99,6 +99,10 @@ struct fwnode_operations { (fwnode ? (fwnode_has_op(fwnode, op) ? \ (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : -ENXIO) : \ -EINVAL) +#define fwnode_call_bool_op(fwnode, op, ...) \ + (fwnode ? (fwnode_has_op(fwnode, op) ? \ + (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : false) : \ + false) #define fwnode_call_ptr_op(fwnode, op, ...) \ (fwnode_has_op(fwnode, op) ? \ (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : NULL) -- cgit v1.2.3 From 8c6ae4980e70395cbdfdf605c29673c5a6a89d9a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 30 Jun 2017 12:03:54 -0400 Subject: sunrpc: Allocate up to RPCSVC_MAXPAGES per svc_rqst svcrdma needs 259 pages allocated to receive 1MB NFSv4.0 WRITE requests: - 1 page for the transport header and head iovec - 256 pages for the data payload - 1 page for the trailing GETATTR request (since NFSD XDR decoding does not look for a tail iovec, the GETATTR is stuck at the end of the rqstp->rq_arg.pages list) - 1 page for building the reply xdr_buf But RPCSVC_MAXPAGES is already 259 (on x86_64). The problem is that svc_alloc_arg never allocates that many pages. To address this: 1. The final element of rq_pages always points to NULL. To accommodate up to 259 pages in rq_pages, add an extra element to rq_pages for the array termination sentinel. 2. Adjust the calculation of "pages" to match how RPCSVC_MAXPAGES is calculated, so it can go up to 259. Bruce noted that the calculation assumes sv_max_mesg is a multiple of PAGE_SIZE, which might not always be true. I didn't change this assumption. 3. Change the loop boundaries to allow 259 pages to be allocated. Additional clean-up: WARN_ON_ONCE adds an extra conditional branch, which is basically never taken. And there's no need to dump the stack here because svc_alloc_arg has only one caller. Keeping that NULL "array termination sentinel"; there doesn't appear to be any code that depends on it, only code in nfsd_splice_actor() which needs the 259th element to be initialized to *something*. So it's possible we could just keep the array at 259 elements and drop that final NULL, but we're being conservative for now. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index eec04982a7ea..a3f8af9bd543 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -246,7 +246,7 @@ struct svc_rqst { size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; struct xdr_buf rq_res; - struct page * rq_pages[RPCSVC_MAXPAGES]; + struct page *rq_pages[RPCSVC_MAXPAGES + 1]; struct page * *rq_respages; /* points into rq_pages */ struct page * *rq_next_page; /* next reply page to use */ struct page * *rq_page_end; /* one past the last page */ -- cgit v1.2.3 From 026d958b38c628a1b4ced534808945365e2747a5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Jun 2017 17:18:24 -0400 Subject: svcrdma: Add recvfrom helpers to svc_rdma_rw.c svc_rdma_rw.c already contains helpers for the sendto path. Introduce helpers for the recvfrom path. The plan is to replace the local NFSD bespoke code that constructs and posts RDMA Read Work Requests with calls to the rdma_rw API. This shares code with other RDMA-enabled ULPs that manages the gory details of buffer registration and posting Work Requests. This new code also puts all RDMA_NOMSG-specific logic in one place. Lastly, the use of rqstp->rq_arg.pages is deprecated in favor of using rqstp->rq_pages directly, for clarity. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 3ca991657889..cf5d5412298b 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -196,6 +196,9 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, /* svc_rdma_rw.c */ extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); +extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, __be32 *p); extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch, struct xdr_buf *xdr); extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, -- cgit v1.2.3 From cafc739892f34b9090413179ca259409fc43bfae Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Jun 2017 17:18:33 -0400 Subject: svcrdma: Use generic RDMA R/W API in RPC Call path The current svcrdma recvfrom code path has a lot of detail about registration mode and the type of port (iWARP, IB, etc). Instead, use the RDMA core's generic R/W API. This shares code with other RDMA-enabled ULPs that manages the gory details of buffer registration and the posting of RDMA Read Work Requests. Since the Read list marshaling code is being replaced, I took the opportunity to replace C structure-based XDR encoding code with more portable code that uses pointer arithmetic. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index cf5d5412298b..b1ba19ba1071 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -82,10 +82,7 @@ struct svc_rdma_op_ctxt { int hdr_count; struct xdr_buf arg; struct ib_cqe cqe; - struct ib_cqe reg_cqe; - struct ib_cqe inv_cqe; u32 byte_len; - u32 position; struct svcxprt_rdma *xprt; unsigned long flags; enum dma_data_direction direction; @@ -116,7 +113,6 @@ struct svcxprt_rdma { struct list_head sc_accept_q; /* Conn. waiting accept */ int sc_ord; /* RDMA read limit */ int sc_max_sge; - int sc_max_sge_rd; /* max sge for read target */ bool sc_snd_w_inv; /* OK to use Send With Invalidate */ atomic_t sc_sq_avail; /* SQEs ready to be consumed */ @@ -141,10 +137,6 @@ struct svcxprt_rdma { struct ib_qp *sc_qp; struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; - int (*sc_reader)(struct svcxprt_rdma *, - struct svc_rqst *, - struct svc_rdma_op_ctxt *, - int *, u32 *, u32, u32, u64, bool); u32 sc_dev_caps; /* distilled device caps */ unsigned int sc_frmr_pg_list_len; struct list_head sc_frmr_q; @@ -187,12 +179,6 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, /* svc_rdma_recvfrom.c */ extern int svc_rdma_recvfrom(struct svc_rqst *); -extern int rdma_read_chunk_lcl(struct svcxprt_rdma *, struct svc_rqst *, - struct svc_rdma_op_ctxt *, int *, u32 *, - u32, u32, u64, bool); -extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, - struct svc_rdma_op_ctxt *, int *, u32 *, - u32, u32, u64, bool); /* svc_rdma_rw.c */ extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); -- cgit v1.2.3 From c84dc900d737a8d8f08768622226980ee863403b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Jun 2017 17:18:49 -0400 Subject: svcrdma: Remove unused Read completion handlers Clean up: The generic RDMA R/W API conversion of svc_rdma_recvfrom replaced the Register, Read, and Invalidate completion handlers. Remove the old ones, which are no longer used. These handlers shared some helper code with svc_rdma_wc_send. Fold the wc_common helper back into the one remaining completion handler. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index b1ba19ba1071..06d58a3f74bc 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -77,17 +77,15 @@ extern atomic_t rdma_stat_sq_prod; */ struct svc_rdma_op_ctxt { struct list_head list; - struct svc_rdma_op_ctxt *read_hdr; struct svc_rdma_fastreg_mr *frmr; - int hdr_count; struct xdr_buf arg; struct ib_cqe cqe; u32 byte_len; struct svcxprt_rdma *xprt; - unsigned long flags; enum dma_data_direction direction; int count; unsigned int mapped_sges; + int hdr_count; struct ib_send_wr send_wr; struct ib_sge sge[1 + RPCRDMA_MAX_INLINE_THRESH / PAGE_SIZE]; struct page *pages[RPCSVC_MAXPAGES]; -- cgit v1.2.3 From 463e63d7014442002399903af027b63ae38f6e77 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Jun 2017 17:18:57 -0400 Subject: svcrdma: Remove frmr cache Clean up: Now that the svc_rdma_recvfrom path uses the rdma_rw API, the details of Read sink buffer registration are dealt with by the kernel's RDMA core. This cache is no longer used, and can be removed. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 06d58a3f74bc..fd7775f70bb5 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -77,7 +77,6 @@ extern atomic_t rdma_stat_sq_prod; */ struct svc_rdma_op_ctxt { struct list_head list; - struct svc_rdma_fastreg_mr *frmr; struct xdr_buf arg; struct ib_cqe cqe; u32 byte_len; @@ -91,17 +90,6 @@ struct svc_rdma_op_ctxt { struct page *pages[RPCSVC_MAXPAGES]; }; -struct svc_rdma_fastreg_mr { - struct ib_mr *mr; - struct scatterlist *sg; - int sg_nents; - unsigned long access_flags; - enum dma_data_direction direction; - struct list_head frmr_list; -}; - -#define RDMACTXT_F_LAST_CTXT 2 - #define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */ #define SVCRDMA_DEVCAP_READ_W_INV 2 /* read w/ invalidate */ @@ -136,9 +124,6 @@ struct svcxprt_rdma { struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; u32 sc_dev_caps; /* distilled device caps */ - unsigned int sc_frmr_pg_list_len; - struct list_head sc_frmr_q; - spinlock_t sc_frmr_q_lock; spinlock_t sc_lock; /* transport lock */ @@ -210,9 +195,6 @@ extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); -extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); -extern void svc_rdma_put_frmr(struct svcxprt_rdma *, - struct svc_rdma_fastreg_mr *); extern void svc_sq_reap(struct svcxprt_rdma *); extern void svc_rq_reap(struct svcxprt_rdma *); extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); -- cgit v1.2.3 From 9450ca8e2febb0000a5efd4f5870915d59ae62bc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 23 Jun 2017 17:19:13 -0400 Subject: svcrdma: Clean up after converting svc_rdma_recvfrom to rdma_rw API Clean up: Registration mode details are now handled by the rdma_rw API, and thus can be removed from svcrdma. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index fd7775f70bb5..995c6fe9ee90 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -90,9 +90,6 @@ struct svc_rdma_op_ctxt { struct page *pages[RPCSVC_MAXPAGES]; }; -#define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */ -#define SVCRDMA_DEVCAP_READ_W_INV 2 /* read w/ invalidate */ - struct svcxprt_rdma { struct svc_xprt sc_xprt; /* SVC transport structure */ struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ @@ -123,7 +120,6 @@ struct svcxprt_rdma { struct ib_qp *sc_qp; struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; - u32 sc_dev_caps; /* distilled device caps */ spinlock_t sc_lock; /* transport lock */ -- cgit v1.2.3 From 949c033694864082db9b3f5304723a6d7407f8e2 Mon Sep 17 00:00:00 2001 From: Gleb Fotengauer-Malinovskiy Date: Tue, 11 Jul 2017 00:22:33 +0300 Subject: KVM: s390: Fix KVM_S390_GET_CMMA_BITS ioctl definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case of KVM_S390_GET_CMMA_BITS, the kernel does not only read struct kvm_s390_cmma_log passed from userspace (which constitutes _IOC_WRITE), it also writes back a return value (which constitutes _IOC_READ) making this an _IOWR ioctl instead of _IOW. Fixes: 4036e387 ("KVM: s390: ioctls to get and set guest storage attributes") Signed-off-by: Gleb Fotengauer-Malinovskiy Acked-by: Christian Borntraeger Signed-off-by: Radim Krčmář --- include/uapi/linux/kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index c0b6dfec5f87..ebd604c222d8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1351,7 +1351,7 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_X86_SMM */ #define KVM_SMI _IO(KVMIO, 0xb7) /* Available with KVM_CAP_S390_CMMA_MIGRATION */ -#define KVM_S390_GET_CMMA_BITS _IOW(KVMIO, 0xb8, struct kvm_s390_cmma_log) +#define KVM_S390_GET_CMMA_BITS _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log) #define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3 From 40bf6a35483ee25271ce2a90d8976cf1409a033a Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 12 Jul 2017 10:23:13 +0200 Subject: rtc: Remove wrong deprecation comment rtc_time_to_tm and rtc_tm_to_time are not deprecated and make perfect sense for RTCs that are simple 32bit counters. Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index d53ecdc060cf..0a0f0d14a5fb 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -33,17 +33,11 @@ static inline time64_t rtc_tm_sub(struct rtc_time *lhs, struct rtc_time *rhs) return rtc_tm_to_time64(lhs) - rtc_tm_to_time64(rhs); } -/** - * Deprecated. Use rtc_time64_to_tm(). - */ static inline void rtc_time_to_tm(unsigned long time, struct rtc_time *tm) { rtc_time64_to_tm(time, tm); } -/** - * Deprecated. Use rtc_tm_to_time64(). - */ static inline int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time) { *time = rtc_tm_to_time64(tm); -- cgit v1.2.3 From 771edcafa753aad304babd3bab8e413574d5db3b Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 12 Jul 2017 09:29:06 -0700 Subject: socket: add documentation for missing elements Fill in missing kernel-doc for missing elements in struct sock. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/sock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 8c85791fc196..f69c8c2782df 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -246,6 +246,7 @@ struct sock_common { * @sk_policy: flow policy * @sk_receive_queue: incoming packets * @sk_wmem_alloc: transmit queue bytes committed + * @sk_tsq_flags: TCP Small Queues flags * @sk_write_queue: Packet sending queue * @sk_omem_alloc: "o" is "option" or "other" * @sk_wmem_queued: persistent queue size @@ -257,6 +258,7 @@ struct sock_common { * @sk_pacing_status: Pacing status (requested, handled by sch_fq) * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) * @sk_sndbuf: size of send buffer in bytes + * @__sk_flags_offset: empty field used to determine location of bitfield * @sk_padding: unused element for alignment * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets * @sk_no_check_rx: allow zero checksum in RX packets @@ -277,6 +279,7 @@ struct sock_common { * @sk_drops: raw/udp drops counter * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() + * @sk_uid: user id of owner * @sk_priority: %SO_PRIORITY setting * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family -- cgit v1.2.3 From 0a2c13d9cd76c84f2520f573ff83f777eb7464aa Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 12 Jul 2017 14:33:01 -0700 Subject: include/linux/dcache.h: use unsigned chars in struct name_snapshot "kernel.h: handle pointers to arrays better in container_of()" triggers: In file included from include/uapi/linux/stddef.h:1:0, from include/linux/stddef.h:4, from include/uapi/linux/posix_types.h:4, from include/uapi/linux/types.h:13, from include/linux/types.h:5, from include/linux/syscalls.h:71, from fs/dcache.c:17: fs/dcache.c: In function 'release_dentry_name_snapshot': include/linux/compiler.h:542:38: error: call to '__compiletime_assert_305' declared with attribute error: pointer type mismatch in container_of() _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) ^ include/linux/compiler.h:525:4: note: in definition of macro '__compiletime_assert' prefix ## suffix(); \ ^ include/linux/compiler.h:542:2: note: in expansion of macro '_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) ^ include/linux/build_bug.h:46:37: note: in expansion of macro 'compiletime_assert' #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) ^ include/linux/kernel.h:860:2: note: in expansion of macro 'BUILD_BUG_ON_MSG' BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ ^ fs/dcache.c:305:7: note: in expansion of macro 'container_of' p = container_of(name->name, struct external_name, name[0]); Switch name_snapshot to use unsigned chars, matching struct qstr and struct external_name. Link: http://lkml.kernel.org/r/20170710152134.0f78c1e6@canb.auug.org.au Signed-off-by: Stephen Rothwell Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dcache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 025727bf6797..c706eaac692e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -592,8 +592,8 @@ static inline struct inode *d_real_inode(const struct dentry *dentry) } struct name_snapshot { - const char *name; - char inline_name[DNAME_INLINE_LEN]; + const unsigned char *name; + unsigned char inline_name[DNAME_INLINE_LEN]; }; void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *); void release_dentry_name_snapshot(struct name_snapshot *); -- cgit v1.2.3 From c7acec713d14c6ce8a20154f9dfda258d6bcad3b Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 12 Jul 2017 14:33:04 -0700 Subject: kernel.h: handle pointers to arrays better in container_of() If the first parameter of container_of() is a pointer to a non-const-qualified array type (and the third parameter names a non-const-qualified array member), the local variable __mptr will be defined with a const-qualified array type. In ISO C, these types are incompatible. They work as expected in GNU C, but some versions will issue warnings. For example, GCC 4.9 produces the warning "initialization from incompatible pointer type". Here is an example of where the problem occurs: ------------------------------------------------------- #include #include MODULE_LICENSE("GPL"); struct st { int a; char b[16]; }; static int __init example_init(void) { struct st t = { .a = 101, .b = "hello" }; char (*p)[16] = &t.b; struct st *x = container_of(p, struct st, b); printk(KERN_DEBUG "%p %p\n", (void *)&t, (void *)x); return 0; } static void __exit example_exit(void) { } module_init(example_init); module_exit(example_exit); ------------------------------------------------------- Building the module with gcc-4.9 results in these warnings (where '{m}' is the module source and '{k}' is the kernel source): ------------------------------------------------------- In file included from {m}/example.c:1:0: {m}/example.c: In function `example_init': {k}/include/linux/kernel.h:854:48: warning: initialization from incompatible pointer type const typeof( ((type *)0)->member ) *__mptr = (ptr); \ ^ {m}/example.c:14:17: note: in expansion of macro `container_of' struct st *x = container_of(p, struct st, b); ^ {k}/include/linux/kernel.h:854:48: warning: (near initialization for `x') const typeof( ((type *)0)->member ) *__mptr = (ptr); \ ^ {m}/example.c:14:17: note: in expansion of macro `container_of' struct st *x = container_of(p, struct st, b); ^ ------------------------------------------------------- Replace the type checking performed by the macro to avoid these warnings. Make sure `*(ptr)` either has type compatible with the member, or has type compatible with `void`, ignoring qualifiers. Raise compiler errors if this is not true. This is stronger than the previous behaviour, which only resulted in compiler warnings for a type mismatch. [arnd@arndb.de: fix new warnings for container_of()] Link: http://lkml.kernel.org/r/20170620200940.90557-1-arnd@arndb.de Link: http://lkml.kernel.org/r/20170525120316.24473-7-abbotti@mev.co.uk Signed-off-by: Ian Abbott Signed-off-by: Arnd Bergmann Acked-by: Michal Nazarewicz Acked-by: Kees Cook Cc: Hidehiro Kawai Cc: Borislav Petkov Cc: Rasmus Villemoes Cc: Johannes Berg Cc: Peter Zijlstra Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 1c91f26e2996..bd6d96cf80b1 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -854,9 +855,12 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @member: the name of the member within the struct. * */ -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ - (type *)( (char *)__mptr - offsetof(type,member) );}) +#define container_of(ptr, type, member) ({ \ + void *__mptr = (void *)(ptr); \ + BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ + !__same_type(*(ptr), void), \ + "pointer type mismatch in container_of()"); \ + ((type *)(__mptr - offsetof(type, member))); }) /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ #ifdef CONFIG_FTRACE_MCOUNT_RECORD -- cgit v1.2.3 From 203e9e41219b4e7357104e525e91ac609fba2c6c Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 12 Jul 2017 14:33:14 -0700 Subject: kexec: move vmcoreinfo out of the kernel's .bss section As Eric said, "what we need to do is move the variable vmcoreinfo_note out of the kernel's .bss section. And modify the code to regenerate and keep this information in something like the control page. Definitely something like this needs a page all to itself, and ideally far away from any other kernel data structures. I clearly was not watching closely the data someone decided to keep this silly thing in the kernel's .bss section." This patch allocates extra pages for these vmcoreinfo_XXX variables, one advantage is that it enhances some safety of vmcoreinfo, because vmcoreinfo now is kept far away from other kernel data structures. Link: http://lkml.kernel.org/r/1493281021-20737-1-git-send-email-xlpang@redhat.com Signed-off-by: Xunlei Pang Tested-by: Michael Holzheu Reviewed-by: Juergen Gross Suggested-by: Eric Biederman Cc: Benjamin Herrenschmidt Cc: Dave Young Cc: Hari Bathini Cc: Mahesh Salgaonkar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crash_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 4090a42578a8..87506a02e914 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -19,7 +19,7 @@ CRASH_CORE_NOTE_NAME_BYTES + \ CRASH_CORE_NOTE_DESC_BYTES) -#define VMCOREINFO_BYTES (4096) +#define VMCOREINFO_BYTES PAGE_SIZE #define VMCOREINFO_NOTE_NAME "VMCOREINFO" #define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) #define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ @@ -56,7 +56,7 @@ phys_addr_t paddr_vmcoreinfo_note(void); #define VMCOREINFO_CONFIG(name) \ vmcoreinfo_append_str("CONFIG_%s=y\n", #name) -extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; +extern u32 *vmcoreinfo_note; extern size_t vmcoreinfo_size; extern size_t vmcoreinfo_max_size; -- cgit v1.2.3 From 5203f4995d9a87952a83c2ce7866adbbe8f97bb5 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 12 Jul 2017 14:33:17 -0700 Subject: powerpc/fadump: use the correct VMCOREINFO_NOTE_SIZE for phdr vmcoreinfo_max_size stands for the vmcoreinfo_data, the correct one we should use is vmcoreinfo_note whose total size is VMCOREINFO_NOTE_SIZE. Like explained in commit 77019967f06b ("kdump: fix exported size of vmcoreinfo note"), it should not affect the actual function, but we better fix it, also this change should be safe and backward compatible. After this, we can get rid of variable vmcoreinfo_max_size, let's use the corresponding macros directly, fewer variables means more safety for vmcoreinfo operation. [xlpang@redhat.com: fix build warning] Link: http://lkml.kernel.org/r/1494830606-27736-1-git-send-email-xlpang@redhat.com Link: http://lkml.kernel.org/r/1493281021-20737-2-git-send-email-xlpang@redhat.com Signed-off-by: Xunlei Pang Reviewed-by: Mahesh Salgaonkar Reviewed-by: Dave Young Cc: Hari Bathini Cc: Benjamin Herrenschmidt Cc: Eric Biederman Cc: Juergen Gross Cc: Michael Holzheu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crash_core.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 87506a02e914..e5df1b3cf072 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -58,7 +58,6 @@ phys_addr_t paddr_vmcoreinfo_note(void); extern u32 *vmcoreinfo_note; extern size_t vmcoreinfo_size; -extern size_t vmcoreinfo_max_size; Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); -- cgit v1.2.3 From 1229384f5b856d83698c38f9dedfd836e26711cb Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 12 Jul 2017 14:33:21 -0700 Subject: kdump: protect vmcoreinfo data under the crash memory Currently vmcoreinfo data is updated at boot time subsys_initcall(), it has the risk of being modified by some wrong code during system is running. As a result, vmcore dumped may contain the wrong vmcoreinfo. Later on, when using "crash", "makedumpfile", etc utility to parse this vmcore, we probably will get "Segmentation fault" or other unexpected errors. E.g. 1) wrong code overwrites vmcoreinfo_data; 2) further crashes the system; 3) trigger kdump, then we obviously will fail to recognize the crash context correctly due to the corrupted vmcoreinfo. Now except for vmcoreinfo, all the crash data is well protected(including the cpu note which is fully updated in the crash path, thus its correctness is guaranteed). Given that vmcoreinfo data is a large chunk prepared for kdump, we better protect it as well. To solve this, we relocate and copy vmcoreinfo_data to the crash memory when kdump is loading via kexec syscalls. Because the whole crash memory will be protected by existing arch_kexec_protect_crashkres() mechanism, we naturally protect vmcoreinfo_data from write(even read) access under kernel direct mapping after kdump is loaded. Since kdump is usually loaded at the very early stage after boot, we can trust the correctness of the vmcoreinfo data copied. On the other hand, we still need to operate the vmcoreinfo safe copy when crash happens to generate vmcoreinfo_note again, we rely on vmap() to map out a new kernel virtual address and update to use this new one instead in the following crash_save_vmcoreinfo(). BTW, we do not touch vmcoreinfo_note, because it will be fully updated using the protected vmcoreinfo_data after crash which is surely correct just like the cpu crash note. Link: http://lkml.kernel.org/r/1493281021-20737-3-git-send-email-xlpang@redhat.com Signed-off-by: Xunlei Pang Tested-by: Michael Holzheu Cc: Benjamin Herrenschmidt Cc: Dave Young Cc: Eric Biederman Cc: Hari Bathini Cc: Juergen Gross Cc: Mahesh Salgaonkar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crash_core.h | 2 +- include/linux/kexec.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index e5df1b3cf072..2df2118fbe13 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -28,6 +28,7 @@ typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4]; +void crash_update_vmcoreinfo_safecopy(void *ptr); void crash_save_vmcoreinfo(void); void arch_crash_save_vmcoreinfo(void); __printf(1, 2) @@ -57,7 +58,6 @@ phys_addr_t paddr_vmcoreinfo_note(void); vmcoreinfo_append_str("CONFIG_%s=y\n", #name) extern u32 *vmcoreinfo_note; -extern size_t vmcoreinfo_size; Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len); diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 65888418fb69..dd056fab9e35 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -172,6 +172,7 @@ struct kimage { unsigned long start; struct page *control_code_page; struct page *swap_page; + void *vmcoreinfo_data_copy; /* locates in the crash memory */ unsigned long nr_segments; struct kexec_segment segment[KEXEC_SEGMENT_MAX]; @@ -241,6 +242,7 @@ extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); int kexec_crash_loaded(void); void crash_save_cpu(struct pt_regs *regs, int cpu); +extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; -- cgit v1.2.3 From 61d9b56a89208d8cccd0b4cfec7e6959717e16e3 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 12 Jul 2017 14:33:40 -0700 Subject: sysctl: add unsigned int range support To keep parity with regular int interfaces provide the an unsigned int proc_douintvec_minmax() which allows you to specify a range of allowed valid numbers. Adding proc_douintvec_minmax_sysadmin() is easy but we can wait for an actual user for that. Link: http://lkml.kernel.org/r/20170519033554.18592-6-mcgrof@kernel.org Signed-off-by: Luis R. Rodriguez Acked-by: Kees Cook Cc: Subash Abhinov Kasiviswanathan Cc: Heinrich Schuchardt Cc: Kees Cook Cc: "David S. Miller" Cc: Ingo Molnar Cc: Al Viro Cc: "Eric W. Biederman" Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 80d07816def0..225001d437ae 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -47,6 +47,9 @@ extern int proc_douintvec(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_dointvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); +extern int proc_douintvec_minmax(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); extern int proc_dointvec_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, -- cgit v1.2.3 From 0791e3644e5ef21646fe565b9061788d05ec71d4 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 12 Jul 2017 14:34:28 -0700 Subject: kcmp: add KCMP_EPOLL_TFD mode to compare epoll target files With current epoll architecture target files are addressed with file_struct and file descriptor number, where the last is not unique. Moreover files can be transferred from another process via unix socket, added into queue and closed then so we won't find this descriptor in the task fdinfo list. Thus to checkpoint and restore such processes CRIU needs to find out where exactly the target file is present to add it into epoll queue. For this sake one can use kcmp call where some particular target file from the queue is compared with arbitrary file passed as an argument. Because epoll target files can have same file descriptor number but different file_struct a caller should explicitly specify the offset within. To test if some particular file is matching entry inside epoll one have to - fill kcmp_epoll_slot structure with epoll file descriptor, target file number and target file offset (in case if only one target is present then it should be 0) - call kcmp as kcmp(pid1, pid2, KCMP_EPOLL_TFD, fd, &kcmp_epoll_slot) - the kernel fetch file pointer matching file descriptor @fd of pid1 - lookups for file struct in epoll queue of pid2 and returns traditional 0,1,2 result for sorting purpose Link: http://lkml.kernel.org/r/20170424154423.511592110@gmail.com Signed-off-by: Cyrill Gorcunov Acked-by: Andrey Vagin Cc: Al Viro Cc: Pavel Emelyanov Cc: Michael Kerrisk Cc: Jason Baron Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/eventpoll.h | 3 +++ include/uapi/linux/kcmp.h | 10 ++++++++++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 6daf6d4971f6..d8625d214ea7 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -14,6 +14,7 @@ #define _LINUX_EVENTPOLL_H #include +#include /* Forward declarations to avoid compiler errors */ @@ -22,6 +23,8 @@ struct file; #ifdef CONFIG_EPOLL +struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff); + /* Used to initialize the epoll bits inside the "struct file" */ static inline void eventpoll_init_file(struct file *file) { diff --git a/include/uapi/linux/kcmp.h b/include/uapi/linux/kcmp.h index 84df14b37360..481e103da78e 100644 --- a/include/uapi/linux/kcmp.h +++ b/include/uapi/linux/kcmp.h @@ -1,6 +1,8 @@ #ifndef _UAPI_LINUX_KCMP_H #define _UAPI_LINUX_KCMP_H +#include + /* Comparison type */ enum kcmp_type { KCMP_FILE, @@ -10,8 +12,16 @@ enum kcmp_type { KCMP_SIGHAND, KCMP_IO, KCMP_SYSVSEM, + KCMP_EPOLL_TFD, KCMP_TYPES, }; +/* Slot for KCMP_EPOLL_TFD */ +struct kcmp_epoll_slot { + __u32 efd; /* epoll file descriptor */ + __u32 tfd; /* target file number */ + __u32 toff; /* target offset within same numbered sequence */ +}; + #endif /* _UAPI_LINUX_KCMP_H */ -- cgit v1.2.3 From 92ef6da3d06ff551a86de41ae37df9cc4b58d7a0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 12 Jul 2017 14:34:31 -0700 Subject: kcmp: fs/epoll: wrap kcmp code with CONFIG_CHECKPOINT_RESTORE kcmp syscall is build iif CONFIG_CHECKPOINT_RESTORE is selected, so wrap appropriate helpers in epoll code with the config to build it conditionally. Link: http://lkml.kernel.org/r/20170513083456.GG1881@uranus.lan Signed-off-by: Cyrill Gorcunov Reported-by: Andrew Morton Cc: Andrey Vagin Cc: Al Viro Cc: Pavel Emelyanov Cc: Michael Kerrisk Cc: Jason Baron Cc: Andy Lutomirski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/eventpoll.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index d8625d214ea7..2f14ac73d01d 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -23,7 +23,9 @@ struct file; #ifdef CONFIG_EPOLL +#ifdef CONFIG_CHECKPOINT_RESTORE struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff); +#endif /* Used to initialize the epoll bits inside the "struct file" */ static inline void eventpoll_init_file(struct file *file) -- cgit v1.2.3 From e41d58185f1444368873d4d7422f7664a68be61d Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 12 Jul 2017 14:34:35 -0700 Subject: fault-inject: support systematic fault injection Add /proc/self/task//fail-nth file that allows failing 0-th, 1-st, 2-nd and so on calls systematically. Excerpt from the added documentation: "Write to this file of integer N makes N-th call in the current task fail (N is 0-based). Read from this file returns a single char 'Y' or 'N' that says if the fault setup with a previous write to this file was injected or not, and disables the fault if it wasn't yet injected. Note that this file enables all types of faults (slab, futex, etc). This setting takes precedence over all other generic settings like probability, interval, times, etc. But per-capability settings (e.g. fail_futex/ignore-private) take precedence over it. This feature is intended for systematic testing of faults in a single system call. See an example below" Why add a new setting: 1. Existing settings are global rather than per-task. So parallel testing is not possible. 2. attr->interval is close but it depends on attr->count which is non reset to 0, so interval does not work as expected. 3. Trying to model this with existing settings requires manipulations of all of probability, interval, times, space, task-filter and unexposed count and per-task make-it-fail files. 4. Existing settings are per-failure-type, and the set of failure types is potentially expanding. 5. make-it-fail can't be changed by unprivileged user and aggressive stress testing better be done from an unprivileged user. Similarly, this would require opening the debugfs files to the unprivileged user, as he would need to reopen at least times file (not possible to pre-open before dropping privs). The proposed interface solves all of the above (see the example). We want to integrate this into syzkaller fuzzer. A prototype has found 10 bugs in kernel in first day of usage: https://groups.google.com/forum/#!searchin/syzkaller/%22FAULT_INJECTION%22%7Csort:relevance I've made the current interface work with all types of our sandboxes. For setuid the secret sauce was prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) to make /proc entries non-root owned. So I am fine with the current version of the code. [akpm@linux-foundation.org: fix build] Link: http://lkml.kernel.org/r/20170328130128.101773-1-dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Akinobu Mita Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 20814b7d7d70..3822d749fc9e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -974,6 +974,7 @@ struct task_struct { #ifdef CONFIG_FAULT_INJECTION int make_it_fail; + int fail_nth; #endif /* * When (nr_dirtied >= nr_dirtied_pause), it's time to call -- cgit v1.2.3 From 1a23395672658969a4035dcc518ea6cab835c579 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Wed, 12 Jul 2017 14:34:38 -0700 Subject: ipc/sem.c: remove sem_base, embed struct sem sma->sem_base is initialized with sma->sem_base = (struct sem *) &sma[1]; The current code has four problems: - There is an unnecessary pointer dereference - sem_base is not needed. - Alignment for struct sem only works by chance. - The current code causes false positive for static code analysis. - This is a cast between different non-void types, which the future randstruct GCC plugin warns on. And, as bonus, the code size gets smaller: Before: 0 .text 00003770 After: 0 .text 0000374e [manfred@colorfullife.com: s/[0]/[]/, per hch] Link: http://lkml.kernel.org/r/20170525185107.12869-2-manfred@colorfullife.com Link: http://lkml.kernel.org/r/20170515171912.6298-2-manfred@colorfullife.com Signed-off-by: Manfred Spraul Acked-by: Kees Cook Cc: Kees Cook Cc: <1vier1@web.de> Cc: Davidlohr Bueso Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Fabian Frederick Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sem.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sem.h b/include/linux/sem.h index 9edec926e9d9..9db14093b73c 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -8,11 +8,29 @@ struct task_struct; +/* One semaphore structure for each semaphore in the system. */ +struct sem { + int semval; /* current value */ + /* + * PID of the process that last modified the semaphore. For + * Linux, specifically these are: + * - semop + * - semctl, via SETVAL and SETALL. + * - at task exit when performing undo adjustments (see exit_sem). + */ + int sempid; + spinlock_t lock; /* spinlock for fine-grained semtimedop */ + struct list_head pending_alter; /* pending single-sop operations */ + /* that alter the semaphore */ + struct list_head pending_const; /* pending single-sop operations */ + /* that do not alter the semaphore*/ + time_t sem_otime; /* candidate for sem_otime */ +} ____cacheline_aligned_in_smp; + /* One sem_array data structure for each set of semaphores in the system. */ struct sem_array { struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ time_t sem_ctime; /* last change time */ - struct sem *sem_base; /* ptr to first semaphore in array */ struct list_head pending_alter; /* pending operations */ /* that alter the array */ struct list_head pending_const; /* pending complex operations */ @@ -21,6 +39,8 @@ struct sem_array { int sem_nsems; /* no. of semaphores in array */ int complex_count; /* pending complex operations */ unsigned int use_global_lock;/* >0: global lock required */ + + struct sem sems[]; }; #ifdef CONFIG_SYSVIPC -- cgit v1.2.3 From dba4cdd39e698d8dcdad0656825423052ac90ccd Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Wed, 12 Jul 2017 14:34:41 -0700 Subject: ipc: merge ipc_rcu and kern_ipc_perm ipc has two management structures that exist for every id: - struct kern_ipc_perm, it contains e.g. the permissions. - struct ipc_rcu, it contains the rcu head for rcu handling and the refcount. The patch merges both structures. As a bonus, we may save one cacheline, because both structures are cacheline aligned. In addition, it reduces the number of casts, instead most codepaths can use container_of. To simplify code, the ipc_rcu_alloc initializes the allocation to 0. [manfred@colorfullife.com: really include the memset() into ipc_alloc_rcu()] Link: http://lkml.kernel.org/r/564f8612-0601-b267-514f-a9f650ec9b32@colorfullife.com Link: http://lkml.kernel.org/r/20170525185107.12869-3-manfred@colorfullife.com Signed-off-by: Manfred Spraul Cc: Davidlohr Bueso Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ipc.h b/include/linux/ipc.h index 71fd92d81b26..5591f055e13f 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -20,6 +20,9 @@ struct kern_ipc_perm { umode_t mode; unsigned long seq; void *security; + + struct rcu_head rcu; + atomic_t refcount; } ____cacheline_aligned_in_smp; #endif /* _LINUX_IPC_H */ -- cgit v1.2.3 From 2cd648c110b5570c3280bd645797658cabbe5f5c Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Wed, 12 Jul 2017 14:34:44 -0700 Subject: include/linux/sem.h: correctly document sem_ctime sem_ctime is initialized to the semget() time and then updated at every semctl() that changes the array. Thus it does not represent the time of the last change. Especially, semop() calls are only stored in sem_otime, not in sem_ctime. This is already described in ipc/sem.c, I just overlooked that there is a comment in include/linux/sem.h and man semctl(2) as well. So: Correct wrong comments. Link: http://lkml.kernel.org/r/20170515171912.6298-4-manfred@colorfullife.com Signed-off-by: Manfred Spraul Cc: Kees Cook Cc: <1vier1@web.de> Cc: Davidlohr Bueso Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Fabian Frederick Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sem.h | 2 +- include/uapi/linux/sem.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sem.h b/include/linux/sem.h index 9db14093b73c..be5cf2ea14ad 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -30,7 +30,7 @@ struct sem { /* One sem_array data structure for each set of semaphores in the system. */ struct sem_array { struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ - time_t sem_ctime; /* last change time */ + time_t sem_ctime; /* create/last semctl() time */ struct list_head pending_alter; /* pending operations */ /* that alter the array */ struct list_head pending_const; /* pending complex operations */ diff --git a/include/uapi/linux/sem.h b/include/uapi/linux/sem.h index dd73b908b2f3..67eb90361692 100644 --- a/include/uapi/linux/sem.h +++ b/include/uapi/linux/sem.h @@ -23,7 +23,7 @@ struct semid_ds { struct ipc_perm sem_perm; /* permissions .. see ipc.h */ __kernel_time_t sem_otime; /* last semop time */ - __kernel_time_t sem_ctime; /* last change time */ + __kernel_time_t sem_ctime; /* create/last semctl() time */ struct sem *sem_base; /* ptr to first semaphore in array */ struct sem_queue *sem_pending; /* pending operations to be processed */ struct sem_queue **sem_pending_last; /* last pending operation */ -- cgit v1.2.3 From 24bb44612c5f93a1dff1f7e71b7b7b109a988791 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 12 Jul 2017 14:35:40 -0700 Subject: kernel/watchdog: remove unused declaration Patch series "Improve watchdog config for arch watchdogs", v4. A series to make the hardlockup watchdog more easily replaceable by arch code. The last patch provides some justification for why we want to do this (existing sparc watchdog is another that could benefit). This patch (of 5): Remove unused declaration. Link: http://lkml.kernel.org/r/20170616065715.18390-2-npiggin@gmail.com Signed-off-by: Nicholas Piggin Reviewed-by: Don Zickus Reviewed-by: Babu Moger Tested-by: Babu Moger [sparc] Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index aa3cd0878270..5e2e57536d98 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -12,9 +12,6 @@ extern void touch_softlockup_watchdog_sched(void); extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); -extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern unsigned int hardlockup_panic; void lockup_detector_init(void); -- cgit v1.2.3 From f2e0cff85ed111a3cf24d894c3fa11697dfae628 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 12 Jul 2017 14:35:43 -0700 Subject: kernel/watchdog: introduce arch_touch_nmi_watchdog() For architectures that define HAVE_NMI_WATCHDOG, instead of having them provide the complete touch_nmi_watchdog() function, just have them provide arch_touch_nmi_watchdog(). This gives the generic code more flexibility in implementing this function, and arch implementations don't miss out on touching the softlockup watchdog or other generic details. Link: http://lkml.kernel.org/r/20170616065715.18390-3-npiggin@gmail.com Signed-off-by: Nicholas Piggin Reviewed-by: Don Zickus Reviewed-by: Babu Moger Tested-by: Babu Moger [sparc] Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 5e2e57536d98..bd387ef8bccd 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -6,6 +6,9 @@ #include #include +#if defined(CONFIG_HAVE_NMI_WATCHDOG) +#include +#endif #ifdef CONFIG_LOCKUP_DETECTOR extern void touch_softlockup_watchdog_sched(void); @@ -58,6 +61,18 @@ static inline void reset_hung_task_detector(void) #define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) +#if defined(CONFIG_HARDLOCKUP_DETECTOR) +extern void hardlockup_detector_disable(void); +#else +static inline void hardlockup_detector_disable(void) {} +#endif + +#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) +extern void arch_touch_nmi_watchdog(void); +#else +static inline void arch_touch_nmi_watchdog(void) {} +#endif + /** * touch_nmi_watchdog - restart NMI watchdog timeout. * @@ -65,21 +80,11 @@ static inline void reset_hung_task_detector(void) * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. */ -#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) -#include -extern void touch_nmi_watchdog(void); -#else static inline void touch_nmi_watchdog(void) { + arch_touch_nmi_watchdog(); touch_softlockup_watchdog(); } -#endif - -#if defined(CONFIG_HARDLOCKUP_DETECTOR) -extern void hardlockup_detector_disable(void); -#else -static inline void hardlockup_detector_disable(void) {} -#endif /* * Create trigger_all_cpu_backtrace() out of the arch-provided -- cgit v1.2.3 From 05a4a95279311c3a4633b4277a5d21cfd616c6c7 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 12 Jul 2017 14:35:46 -0700 Subject: kernel/watchdog: split up config options Split SOFTLOCKUP_DETECTOR from LOCKUP_DETECTOR, and split HARDLOCKUP_DETECTOR_PERF from HARDLOCKUP_DETECTOR. LOCKUP_DETECTOR implies the general boot, sysctl, and programming interfaces for the lockup detectors. An architecture that wants to use a hard lockup detector must define HAVE_HARDLOCKUP_DETECTOR_PERF or HAVE_HARDLOCKUP_DETECTOR_ARCH. Alternatively an arch can define HAVE_NMI_WATCHDOG, which provides the minimum arch_touch_nmi_watchdog, and it otherwise does its own thing and does not implement the LOCKUP_DETECTOR interfaces. sparc is unusual in that it has started to implement some of the interfaces, but not fully yet. It should probably be converted to a full HAVE_HARDLOCKUP_DETECTOR_ARCH. [npiggin@gmail.com: fix] Link: http://lkml.kernel.org/r/20170617223522.66c0ad88@roar.ozlabs.ibm.com Link: http://lkml.kernel.org/r/20170616065715.18390-4-npiggin@gmail.com Signed-off-by: Nicholas Piggin Reviewed-by: Don Zickus Reviewed-by: Babu Moger Tested-by: Babu Moger [sparc] Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index bd387ef8bccd..8aa01fd859fb 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -11,13 +11,21 @@ #endif #ifdef CONFIG_LOCKUP_DETECTOR +void lockup_detector_init(void); +#else +static inline void lockup_detector_init(void) +{ +} +#endif + +#ifdef CONFIG_SOFTLOCKUP_DETECTOR extern void touch_softlockup_watchdog_sched(void); extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); extern unsigned int softlockup_panic; -extern unsigned int hardlockup_panic; -void lockup_detector_init(void); +extern int soft_watchdog_enabled; +extern atomic_t watchdog_park_in_progress; #else static inline void touch_softlockup_watchdog_sched(void) { @@ -31,9 +39,6 @@ static inline void touch_softlockup_watchdog_sync(void) static inline void touch_all_softlockup_watchdogs(void) { } -static inline void lockup_detector_init(void) -{ -} #endif #ifdef CONFIG_DETECT_HUNG_TASK @@ -63,15 +68,18 @@ static inline void reset_hung_task_detector(void) #if defined(CONFIG_HARDLOCKUP_DETECTOR) extern void hardlockup_detector_disable(void); +extern unsigned int hardlockup_panic; #else static inline void hardlockup_detector_disable(void) {} #endif -#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) +#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void arch_touch_nmi_watchdog(void); #else +#if !defined(CONFIG_HAVE_NMI_WATCHDOG) static inline void arch_touch_nmi_watchdog(void) {} #endif +#endif /** * touch_nmi_watchdog - restart NMI watchdog timeout. @@ -141,15 +149,18 @@ static inline bool trigger_single_cpu_backtrace(int cpu) } #endif -#ifdef CONFIG_LOCKUP_DETECTOR +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF u64 hw_nmi_get_sample_period(int watchdog_thresh); +#endif + +#ifdef CONFIG_LOCKUP_DETECTOR extern int nmi_watchdog_enabled; -extern int soft_watchdog_enabled; extern int watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long watchdog_enabled; +extern struct cpumask watchdog_cpumask; extern unsigned long *watchdog_cpumask_bits; -extern atomic_t watchdog_park_in_progress; +extern int __read_mostly watchdog_suspended; #ifdef CONFIG_SMP extern int sysctl_softlockup_all_cpu_backtrace; extern int sysctl_hardlockup_all_cpu_backtrace; -- cgit v1.2.3 From 6974f0c4555e285ab217cee58b6e874f776ff409 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 12 Jul 2017 14:36:10 -0700 Subject: include/linux/string.h: add the option of fortified string.h functions This adds support for compiling with a rough equivalent to the glibc _FORTIFY_SOURCE=1 feature, providing compile-time and runtime buffer overflow checks for string.h functions when the compiler determines the size of the source or destination buffer at compile-time. Unlike glibc, it covers buffer reads in addition to writes. GNU C __builtin_*_chk intrinsics are avoided because they would force a much more complex implementation. They aren't designed to detect read overflows and offer no real benefit when using an implementation based on inline checks. Inline checks don't add up to much code size and allow full use of the regular string intrinsics while avoiding the need for a bunch of _chk functions and per-arch assembly to avoid wrapper overhead. This detects various overflows at compile-time in various drivers and some non-x86 core kernel code. There will likely be issues caught in regular use at runtime too. Future improvements left out of initial implementation for simplicity, as it's all quite optional and can be done incrementally: * Some of the fortified string functions (strncpy, strcat), don't yet place a limit on reads from the source based on __builtin_object_size of the source buffer. * Extending coverage to more string functions like strlcat. * It should be possible to optionally use __builtin_object_size(x, 1) for some functions (C strings) to detect intra-object overflows (like glibc's _FORTIFY_SOURCE=2), but for now this takes the conservative approach to avoid likely compatibility issues. * The compile-time checks should be made available via a separate config option which can be enabled by default (or always enabled) once enough time has passed to get the issues it catches fixed. Kees said: "This is great to have. While it was out-of-tree code, it would have blocked at least CVE-2016-3858 from being exploitable (improper size argument to strlcpy()). I've sent a number of fixes for out-of-bounds-reads that this detected upstream already" [arnd@arndb.de: x86: fix fortified memcpy] Link: http://lkml.kernel.org/r/20170627150047.660360-1-arnd@arndb.de [keescook@chromium.org: avoid panic() in favor of BUG()] Link: http://lkml.kernel.org/r/20170626235122.GA25261@beast [keescook@chromium.org: move from -mm, add ARCH_HAS_FORTIFY_SOURCE, tweak Kconfig help] Link: http://lkml.kernel.org/r/20170526095404.20439-1-danielmicay@gmail.com Link: http://lkml.kernel.org/r/1497903987-21002-8-git-send-email-keescook@chromium.org Signed-off-by: Daniel Micay Signed-off-by: Kees Cook Signed-off-by: Arnd Bergmann Acked-by: Kees Cook Cc: Mark Rutland Cc: Daniel Axtens Cc: Rasmus Villemoes Cc: Andy Shevchenko Cc: Chris Metcalf Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 200 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index 7439d83eaa33..96f5a5fd0377 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -193,4 +193,204 @@ static inline const char *kbasename(const char *path) return tail ? tail + 1 : path; } +#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline)) +#define __RENAME(x) __asm__(#x) + +void fortify_panic(const char *name) __noreturn __cold; +void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter"); +void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter"); +void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter"); + +#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) +__FORTIFY_INLINE char *strcpy(char *p, const char *q) +{ + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (p_size == (size_t)-1 && q_size == (size_t)-1) + return __builtin_strcpy(p, q); + if (strscpy(p, q, p_size < q_size ? p_size : q_size) < 0) + fortify_panic(__func__); + return p; +} + +__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __write_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __builtin_strncpy(p, q, size); +} + +__FORTIFY_INLINE char *strcat(char *p, const char *q) +{ + size_t p_size = __builtin_object_size(p, 0); + if (p_size == (size_t)-1) + return __builtin_strcat(p, q); + if (strlcat(p, q, p_size) >= p_size) + fortify_panic(__func__); + return p; +} + +__FORTIFY_INLINE __kernel_size_t strlen(const char *p) +{ + __kernel_size_t ret; + size_t p_size = __builtin_object_size(p, 0); + if (p_size == (size_t)-1) + return __builtin_strlen(p); + ret = strnlen(p, p_size); + if (p_size <= ret) + fortify_panic(__func__); + return ret; +} + +extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); +__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) +{ + size_t p_size = __builtin_object_size(p, 0); + __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); + if (p_size <= ret && maxlen != ret) + fortify_panic(__func__); + return ret; +} + +/* defined after fortified strlen to reuse it */ +extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); +__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) +{ + size_t ret; + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (p_size == (size_t)-1 && q_size == (size_t)-1) + return __real_strlcpy(p, q, size); + ret = strlen(q); + if (size) { + size_t len = (ret >= size) ? size - 1 : ret; + if (__builtin_constant_p(len) && len >= p_size) + __write_overflow(); + if (len >= p_size) + fortify_panic(__func__); + __builtin_memcpy(p, q, len); + p[len] = '\0'; + } + return ret; +} + +/* defined after fortified strlen and strnlen to reuse them */ +__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count) +{ + size_t p_len, copy_len; + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (p_size == (size_t)-1 && q_size == (size_t)-1) + return __builtin_strncat(p, q, count); + p_len = strlen(p); + copy_len = strnlen(q, count); + if (p_size < p_len + copy_len + 1) + fortify_panic(__func__); + __builtin_memcpy(p + p_len, q, copy_len); + p[p_len + copy_len] = '\0'; + return p; +} + +__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __write_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __builtin_memset(p, c, size); +} + +__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (__builtin_constant_p(size)) { + if (p_size < size) + __write_overflow(); + if (q_size < size) + __read_overflow2(); + } + if (p_size < size || q_size < size) + fortify_panic(__func__); + return __builtin_memcpy(p, q, size); +} + +__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (__builtin_constant_p(size)) { + if (p_size < size) + __write_overflow(); + if (q_size < size) + __read_overflow2(); + } + if (p_size < size || q_size < size) + fortify_panic(__func__); + return __builtin_memmove(p, q, size); +} + +extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan); +__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __read_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __real_memscan(p, c, size); +} + +__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (__builtin_constant_p(size)) { + if (p_size < size) + __read_overflow(); + if (q_size < size) + __read_overflow2(); + } + if (p_size < size || q_size < size) + fortify_panic(__func__); + return __builtin_memcmp(p, q, size); +} + +__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __read_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __builtin_memchr(p, c, size); +} + +void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv); +__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __read_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __real_memchr_inv(p, c, size); +} + +extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup); +__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __read_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __real_kmemdup(p, size, gfp); +} +#endif + #endif /* _LINUX_STRING_H_ */ -- cgit v1.2.3 From 022c204040f3fd22d6445bc35517786195b7ae80 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 12 Jul 2017 14:36:17 -0700 Subject: random,stackprotect: introduce get_random_canary function Patch series "stackprotector: ascii armor the stack canary", v2. Zero out the first byte of the stack canary value on 64 bit systems, in order to mitigate unterminated C string overflows. The null byte both prevents C string functions from reading the canary, and from writing it if the canary value were guessed or obtained through some other means. Reducing the entropy by 8 bits is acceptable on 64-bit systems, which will still have 56 bits of entropy left, but not on 32 bit systems, so the "ascii armor" canary is only implemented on 64-bit systems. Inspired by the "ascii armor" code in execshield and Daniel Micay's linux-hardened tree. Also see https://github.com/thestinger/linux-hardened/ This patch (of 5): Introduce get_random_canary(), which provides a random unsigned long canary value with the first byte zeroed out on 64 bit architectures, in order to mitigate non-terminated C string overflows. The null byte both prevents C string functions from reading the canary, and from writing it if the canary value were guessed or obtained through some other means. Reducing the entropy by 8 bits is acceptable on 64-bit systems, which will still have 56 bits of entropy left, but not on 32 bit systems, so the "ascii armor" canary is only implemented on 64-bit systems. Inspired by the "ascii armor" code in the old execshield patches, and Daniel Micay's linux-hardened tree. Link: http://lkml.kernel.org/r/20170524155751.424-2-riel@redhat.com Signed-off-by: Rik van Riel Acked-by: Kees Cook Cc: Daniel Micay Cc: "Theodore Ts'o" Cc: H. Peter Anvin Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Catalin Marinas Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/random.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index ed5c3838780d..1fa0dc880bd7 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -57,6 +57,27 @@ static inline unsigned long get_random_long(void) #endif } +/* + * On 64-bit architectures, protect against non-terminated C string overflows + * by zeroing out the first byte of the canary; this leaves 56 bits of entropy. + */ +#ifdef CONFIG_64BIT +# ifdef __LITTLE_ENDIAN +# define CANARY_MASK 0xffffffffffffff00UL +# else /* big endian, 64 bits: */ +# define CANARY_MASK 0x00ffffffffffffffUL +# endif +#else /* 32 bits: */ +# define CANARY_MASK 0xffffffffUL +#endif + +static inline unsigned long get_random_canary(void) +{ + unsigned long val = get_random_long(); + + return val & CANARY_MASK; +} + unsigned long randomize_page(unsigned long start, unsigned long range); u32 prandom_u32(void); -- cgit v1.2.3 From dcda9b04713c3f6ff0875652924844fae28286ea Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 12 Jul 2017 14:36:45 -0700 Subject: mm, tree wide: replace __GFP_REPEAT by __GFP_RETRY_MAYFAIL with more useful semantic __GFP_REPEAT was designed to allow retry-but-eventually-fail semantic to the page allocator. This has been true but only for allocations requests larger than PAGE_ALLOC_COSTLY_ORDER. It has been always ignored for smaller sizes. This is a bit unfortunate because there is no way to express the same semantic for those requests and they are considered too important to fail so they might end up looping in the page allocator for ever, similarly to GFP_NOFAIL requests. Now that the whole tree has been cleaned up and accidental or misled usage of __GFP_REPEAT flag has been removed for !costly requests we can give the original flag a better name and more importantly a more useful semantic. Let's rename it to __GFP_RETRY_MAYFAIL which tells the user that the allocator would try really hard but there is no promise of a success. This will work independent of the order and overrides the default allocator behavior. Page allocator users have several levels of guarantee vs. cost options (take GFP_KERNEL as an example) - GFP_KERNEL & ~__GFP_RECLAIM - optimistic allocation without _any_ attempt to free memory at all. The most light weight mode which even doesn't kick the background reclaim. Should be used carefully because it might deplete the memory and the next user might hit the more aggressive reclaim - GFP_KERNEL & ~__GFP_DIRECT_RECLAIM (or GFP_NOWAIT)- optimistic allocation without any attempt to free memory from the current context but can wake kswapd to reclaim memory if the zone is below the low watermark. Can be used from either atomic contexts or when the request is a performance optimization and there is another fallback for a slow path. - (GFP_KERNEL|__GFP_HIGH) & ~__GFP_DIRECT_RECLAIM (aka GFP_ATOMIC) - non sleeping allocation with an expensive fallback so it can access some portion of memory reserves. Usually used from interrupt/bh context with an expensive slow path fallback. - GFP_KERNEL - both background and direct reclaim are allowed and the _default_ page allocator behavior is used. That means that !costly allocation requests are basically nofail but there is no guarantee of that behavior so failures have to be checked properly by callers (e.g. OOM killer victim is allowed to fail currently). - GFP_KERNEL | __GFP_NORETRY - overrides the default allocator behavior and all allocation requests fail early rather than cause disruptive reclaim (one round of reclaim in this implementation). The OOM killer is not invoked. - GFP_KERNEL | __GFP_RETRY_MAYFAIL - overrides the default allocator behavior and all allocation requests try really hard. The request will fail if the reclaim cannot make any progress. The OOM killer won't be triggered. - GFP_KERNEL | __GFP_NOFAIL - overrides the default allocator behavior and all allocation requests will loop endlessly until they succeed. This might be really dangerous especially for larger orders. Existing users of __GFP_REPEAT are changed to __GFP_RETRY_MAYFAIL because they already had their semantic. No new users are added. __alloc_pages_slowpath is changed to bail out for __GFP_RETRY_MAYFAIL if there is no progress and we have already passed the OOM point. This means that all the reclaim opportunities have been exhausted except the most disruptive one (the OOM killer) and a user defined fallback behavior is more sensible than keep retrying in the page allocator. [akpm@linux-foundation.org: fix arch/sparc/kernel/mdesc.c] [mhocko@suse.com: semantic fix] Link: http://lkml.kernel.org/r/20170626123847.GM11534@dhcp22.suse.cz [mhocko@kernel.org: address other thing spotted by Vlastimil] Link: http://lkml.kernel.org/r/20170626124233.GN11534@dhcp22.suse.cz Link: http://lkml.kernel.org/r/20170623085345.11304-3-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Alex Belits Cc: Chris Wilson Cc: Christoph Hellwig Cc: Darrick J. Wong Cc: David Daney Cc: Johannes Weiner Cc: Mel Gorman Cc: NeilBrown Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 56 ++++++++++++++++++++++++++++++++---------- include/linux/slab.h | 3 ++- include/trace/events/mmflags.h | 2 +- 3 files changed, 46 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 4c6656f1fee7..bcfb9f7c46f5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -25,7 +25,7 @@ struct vm_area_struct; #define ___GFP_FS 0x80u #define ___GFP_COLD 0x100u #define ___GFP_NOWARN 0x200u -#define ___GFP_REPEAT 0x400u +#define ___GFP_RETRY_MAYFAIL 0x400u #define ___GFP_NOFAIL 0x800u #define ___GFP_NORETRY 0x1000u #define ___GFP_MEMALLOC 0x2000u @@ -136,26 +136,56 @@ struct vm_area_struct; * * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. * - * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt - * _might_ fail. This depends upon the particular VM implementation. + * The default allocator behavior depends on the request size. We have a concept + * of so called costly allocations (with order > PAGE_ALLOC_COSTLY_ORDER). + * !costly allocations are too essential to fail so they are implicitly + * non-failing by default (with some exceptions like OOM victims might fail so + * the caller still has to check for failures) while costly requests try to be + * not disruptive and back off even without invoking the OOM killer. + * The following three modifiers might be used to override some of these + * implicit rules + * + * __GFP_NORETRY: The VM implementation will try only very lightweight + * memory direct reclaim to get some memory under memory pressure (thus + * it can sleep). It will avoid disruptive actions like OOM killer. The + * caller must handle the failure which is quite likely to happen under + * heavy memory pressure. The flag is suitable when failure can easily be + * handled at small cost, such as reduced throughput + * + * __GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim + * procedures that have previously failed if there is some indication + * that progress has been made else where. It can wait for other + * tasks to attempt high level approaches to freeing memory such as + * compaction (which removes fragmentation) and page-out. + * There is still a definite limit to the number of retries, but it is + * a larger limit than with __GFP_NORETRY. + * Allocations with this flag may fail, but only when there is + * genuinely little unused memory. While these allocations do not + * directly trigger the OOM killer, their failure indicates that + * the system is likely to need to use the OOM killer soon. The + * caller must handle failure, but can reasonably do so by failing + * a higher-level request, or completing it only in a much less + * efficient manner. + * If the allocation does fail, and the caller is in a position to + * free some non-essential memory, doing so could benefit the system + * as a whole. * * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller - * cannot handle allocation failures. New users should be evaluated carefully - * (and the flag should be used only when there is no reasonable failure - * policy) but it is definitely preferable to use the flag rather than - * opencode endless loop around allocator. - * - * __GFP_NORETRY: The VM implementation must not retry indefinitely and will - * return NULL when direct reclaim and memory compaction have failed to allow - * the allocation to succeed. The OOM killer is not called with the current - * implementation. + * cannot handle allocation failures. The allocation could block + * indefinitely but will never return with failure. Testing for + * failure is pointless. + * New users should be evaluated carefully (and the flag should be + * used only when there is no reasonable failure policy) but it is + * definitely preferable to use the flag rather than opencode endless + * loop around allocator. + * Using this flag for costly allocations is _highly_ discouraged. */ #define __GFP_IO ((__force gfp_t)___GFP_IO) #define __GFP_FS ((__force gfp_t)___GFP_FS) #define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ #define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ #define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) -#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT) +#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL) #define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) #define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) diff --git a/include/linux/slab.h b/include/linux/slab.h index 04a7f7993e67..41473df6dfb0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -471,7 +471,8 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * * %__GFP_NOWARN - If allocation fails, don't issue any warnings. * - * %__GFP_REPEAT - If allocation fails initially, try once more before failing. + * %__GFP_RETRY_MAYFAIL - Try really hard to succeed the allocation but fail + * eventually. * * There are other flags available as well, but these are not intended * for general use, and so are not documented here. For a full list of diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 10e3663a75a6..8e50d01c645f 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -34,7 +34,7 @@ {(unsigned long)__GFP_FS, "__GFP_FS"}, \ {(unsigned long)__GFP_COLD, "__GFP_COLD"}, \ {(unsigned long)__GFP_NOWARN, "__GFP_NOWARN"}, \ - {(unsigned long)__GFP_REPEAT, "__GFP_REPEAT"}, \ + {(unsigned long)__GFP_RETRY_MAYFAIL, "__GFP_RETRY_MAYFAIL"}, \ {(unsigned long)__GFP_NOFAIL, "__GFP_NOFAIL"}, \ {(unsigned long)__GFP_NORETRY, "__GFP_NORETRY"}, \ {(unsigned long)__GFP_COMP, "__GFP_COMP"}, \ -- cgit v1.2.3 From 0f55685627d6dd2beda55a82abc02297f0f8e5c2 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 12 Jul 2017 14:36:58 -0700 Subject: mm, migration: do not trigger OOM killer when migrating memory Page migration (for memory hotplug, soft_offline_page or mbind) needs to allocate a new memory. This can trigger an oom killer if the target memory is depleated. Although quite unlikely, still possible, especially for the memory hotplug (offlining of memoery). Up to now we didn't really have reasonable means to back off. __GFP_NORETRY can fail just too easily and __GFP_THISNODE sticks to a single node and that is not suitable for all callers. But now that we have __GFP_RETRY_MAYFAIL we should use it. It is preferable to fail the migration than disrupt the system by killing some processes. Link: http://lkml.kernel.org/r/20170623085345.11304-7-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Alex Belits Cc: Chris Wilson Cc: Christoph Hellwig Cc: Darrick J. Wong Cc: David Daney Cc: Johannes Weiner Cc: Mel Gorman Cc: NeilBrown Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 4634da521238..3e0d405dc842 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -34,7 +34,7 @@ extern char *migrate_reason_names[MR_TYPES]; static inline struct page *new_page_nodemask(struct page *page, int preferred_nid, nodemask_t *nodemask) { - gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; + gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL; if (PageHuge(page)) return alloc_huge_page_nodemask(page_hstate(compound_head(page)), -- cgit v1.2.3 From c945dccc80856107f109c36a7d0e29a371b5d1b5 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Jul 2017 14:37:48 -0700 Subject: ARM: samsung: usb-ohci: move inline before return type Make the code like the rest of the kernel. Link: http://lkml.kernel.org/r/667a515b8d0f10f2465d519f8595edd91552fc5e.1499284835.git.joe@perches.com Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/platform_data/usb-ohci-s3c2410.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/platform_data/usb-ohci-s3c2410.h b/include/linux/platform_data/usb-ohci-s3c2410.h index 7fa1fbefc3f2..cc7554ae6e8b 100644 --- a/include/linux/platform_data/usb-ohci-s3c2410.h +++ b/include/linux/platform_data/usb-ohci-s3c2410.h @@ -31,7 +31,7 @@ struct s3c2410_hcd_info { void (*report_oc)(struct s3c2410_hcd_info *, int ports); }; -static void inline s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports) +static inline void s3c2410_usb_report_oc(struct s3c2410_hcd_info *info, int ports) { if (info->report_oc != NULL) { (info->report_oc)(info, ports); -- cgit v1.2.3 From 3e8f399da490e6ac20a3cfd6aa404c9aa961a9a2 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 12 Jul 2017 14:37:51 -0700 Subject: writeback: rework wb_[dec|inc]_stat family of functions Currently the writeback statistics code uses a percpu counters to hold various statistics. Furthermore we have 2 families of functions - those which disable local irq and those which doesn't and whose names begin with double underscore. However, they both end up calling __add_wb_stats which in turn calls percpu_counter_add_batch which is already irq-safe. Exploiting this fact allows to eliminated the __wb_* functions since they don't add any further protection than we already have. Furthermore, refactor the wb_* function to call __add_wb_stat directly without the irq-disabling dance. This will likely result in better runtime of code which deals with modifying the stat counters. While at it also document why percpu_counter_add_batch is in fact preempt and irq-safe since at least 3 people got confused. Link: http://lkml.kernel.org/r/1498029937-27293-1-git-send-email-nborisov@suse.com Signed-off-by: Nikolay Borisov Acked-by: Tejun Heo Reviewed-by: Jan Kara Cc: Josef Bacik Cc: Mel Gorman Cc: Jeff Layton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 334165c911f0..854e1bdd0b2a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -69,34 +69,14 @@ static inline void __add_wb_stat(struct bdi_writeback *wb, percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH); } -static inline void __inc_wb_stat(struct bdi_writeback *wb, - enum wb_stat_item item) -{ - __add_wb_stat(wb, item, 1); -} - static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - unsigned long flags; - - local_irq_save(flags); - __inc_wb_stat(wb, item); - local_irq_restore(flags); -} - -static inline void __dec_wb_stat(struct bdi_writeback *wb, - enum wb_stat_item item) -{ - __add_wb_stat(wb, item, -1); + __add_wb_stat(wb, item, 1); } static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - unsigned long flags; - - local_irq_save(flags); - __dec_wb_stat(wb, item); - local_irq_restore(flags); + __add_wb_stat(wb, item, -1); } static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) -- cgit v1.2.3 From 3c48d86cc959309ee168fb87737a8cb3f97c5224 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 12 Jul 2017 15:04:16 -0700 Subject: clk: Provide bulk prepare_enable disable_unprepare variants This extends the existing set of bulk helpers with prepare_enable and disable_unprepare variants. Cc: Russell King , Cc: Dong Aisheng Signed-off-by: Bjorn Andersson Signed-off-by: Stephen Boyd --- include/linux/clk.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/clk.h b/include/linux/clk.h index c673f0b91751..690e6a6921e1 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -657,6 +657,28 @@ static inline void clk_disable_unprepare(struct clk *clk) clk_unprepare(clk); } +static inline int clk_bulk_prepare_enable(int num_clks, + struct clk_bulk_data *clks) +{ + int ret; + + ret = clk_bulk_prepare(num_clks, clks); + if (ret) + return ret; + ret = clk_bulk_enable(num_clks, clks); + if (ret) + clk_bulk_unprepare(num_clks, clks); + + return ret; +} + +static inline void clk_bulk_disable_unprepare(int num_clks, + struct clk_bulk_data *clks) +{ + clk_bulk_disable(num_clks, clks); + clk_bulk_unprepare(num_clks, clks); +} + #if defined(CONFIG_OF) && defined(CONFIG_COMMON_CLK) struct clk *of_clk_get(struct device_node *np, int index); struct clk *of_clk_get_by_name(struct device_node *np, const char *name); -- cgit v1.2.3 From efc479e6900c22bad9a2b649d13405ed9cde2d53 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Thu, 22 Jun 2017 16:51:01 +0300 Subject: kvm: x86: hyperv: add KVM_CAP_HYPERV_SYNIC2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a flaw in the Hyper-V SynIC implementation in KVM: when message page or event flags page is enabled by setting the corresponding msr, KVM zeroes it out. This is problematic because on migration the corresponding MSRs are loaded on the destination, so the content of those pages is lost. This went unnoticed so far because the only user of those pages was in-KVM hyperv synic timers, which could continue working despite that zeroing. Newer QEMU uses those pages for Hyper-V VMBus implementation, and zeroing them breaks the migration. Besides, in newer QEMU the content of those pages is fully managed by QEMU, so zeroing them is undesirable even when writing the MSRs from the guest side. To support this new scheme, introduce a new capability, KVM_CAP_HYPERV_SYNIC2, which, when enabled, makes sure that the synic pages aren't zeroed out in KVM. Signed-off-by: Roman Kagan Signed-off-by: Radim Krčmář --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ebd604c222d8..38b2cfbc8112 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -927,6 +927,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_CMMA_MIGRATION 145 #define KVM_CAP_PPC_FWNMI 146 #define KVM_CAP_PPC_SMT_POSSIBLE 147 +#define KVM_CAP_HYPERV_SYNIC2 148 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 0aebdc52ca824c38837a652548028e45da72628f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 09:31:19 +0200 Subject: sunrpc: properly type argument to kxdreproc_t Pass struct rpc_request as the first argument instead of an untyped blob, and mark the data object as const. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton --- include/linux/sunrpc/xdr.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 054c8cde18f3..290f189de200 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -17,6 +17,8 @@ #include #include +struct rpc_rqst; + /* * Buffer adjustment */ @@ -222,7 +224,8 @@ struct xdr_stream { /* * These are the xdr_stream style generic XDR encode and decode functions. */ -typedef void (*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); +typedef void (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + const void *obj); typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); -- cgit v1.2.3 From 993328e2b31fedc35276a4828039ad7af6d519cb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 14:58:11 +0200 Subject: sunrpc: properly type argument to kxdrdproc_t Pass struct rpc_request as the first argument instead of an untyped blob. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Layton Acked-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 290f189de200..ed0fbf0d8d0f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -226,7 +226,8 @@ struct xdr_stream { */ typedef void (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, const void *obj); -typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); +typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + void *obj); extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); -- cgit v1.2.3 From c551858a884b6d81def3d1528a9002ba97f5d4ad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 23:27:10 +0200 Subject: sunrpc: move p_count out of struct rpc_procinfo p_count is the only writeable memeber of struct rpc_procinfo, which is a good candidate to be const-ified as it contains function pointers. This patch moves it into out out struct rpc_procinfo, and into a separate writable array that is pointed to by struct rpc_version and indexed by p_statidx. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/clnt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 6095ecba0dde..c75ba37151fe 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -88,6 +88,7 @@ struct rpc_version { u32 number; /* version number */ unsigned int nrprocs; /* number of procs */ struct rpc_procinfo * procs; /* procedure array */ + unsigned int *counts; /* call counts */ }; /* @@ -99,7 +100,6 @@ struct rpc_procinfo { kxdrdproc_t p_decode; /* XDR decode function */ unsigned int p_arglen; /* argument hdr length (u32) */ unsigned int p_replen; /* reply hdr length (u32) */ - unsigned int p_count; /* call count */ unsigned int p_timer; /* Which RTT timer to use */ u32 p_statidx; /* Which procedure to account */ const char * p_name; /* name of procedure */ -- cgit v1.2.3 From 511e936bf2b3e8be2a3160ace3d86be07962a7a8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 May 2017 15:36:49 +0200 Subject: sunrpc: mark all struct rpc_procinfo instances as const struct rpc_procinfo contains function pointers, and marking it as constant avoids it being able to be used as an attach vector for code injections. Signed-off-by: Christoph Hellwig Acked-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 4 ++-- include/linux/sunrpc/sched.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index c75ba37151fe..55ef67bea06b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -39,7 +39,7 @@ struct rpc_clnt { struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ struct rpc_xprt __rcu * cl_xprt; /* transport */ - struct rpc_procinfo * cl_procinfo; /* procedure info */ + const struct rpc_procinfo *cl_procinfo; /* procedure info */ u32 cl_prog, /* RPC program number */ cl_vers, /* RPC version number */ cl_maxproc; /* max procedure number */ @@ -87,7 +87,7 @@ struct rpc_program { struct rpc_version { u32 number; /* version number */ unsigned int nrprocs; /* number of procs */ - struct rpc_procinfo * procs; /* procedure array */ + const struct rpc_procinfo *procs; /* procedure array */ unsigned int *counts; /* call counts */ }; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 7ba040c797ec..ed60253abd0a 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -22,7 +22,7 @@ */ struct rpc_procinfo; struct rpc_message { - struct rpc_procinfo * rpc_proc; /* Procedure information */ + const struct rpc_procinfo *rpc_proc; /* Procedure information */ void * rpc_argp; /* Arguments */ void * rpc_resp; /* Result */ struct rpc_cred * rpc_cred; /* Credentials */ -- cgit v1.2.3 From 1c8a5409f3c4748ff42c1721d9578dd03091f378 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 17:35:49 +0200 Subject: sunrpc: properly type pc_func callbacks Drop the argp and resp arguments as they can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to svc_procfunc as well as the svc_procfunc typedef itself. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/svc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 11cef5a7bc87..3b58c55614c7 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -419,9 +419,9 @@ struct svc_version { /* * RPC procedure info */ -typedef __be32 (*svc_procfunc)(struct svc_rqst *, void *argp, void *resp); struct svc_procedure { - svc_procfunc pc_func; /* process the request */ + /* process the request: */ + __be32 (*pc_func)(struct svc_rqst *); kxdrproc_t pc_decode; /* XDR decode args */ kxdrproc_t pc_encode; /* XDR encode result */ kxdrproc_t pc_release; /* XDR free result */ -- cgit v1.2.3 From 1150ded804c2be6d02efef3e39e39e570c9cea21 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 18:48:24 +0200 Subject: sunrpc: properly type pc_release callbacks Drop the p and resp arguments as they are always NULL or can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to kxdrproc_t. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/svc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3b58c55614c7..c73194e9c2bd 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -424,7 +424,8 @@ struct svc_procedure { __be32 (*pc_func)(struct svc_rqst *); kxdrproc_t pc_decode; /* XDR decode args */ kxdrproc_t pc_encode; /* XDR encode result */ - kxdrproc_t pc_release; /* XDR free result */ + /* XDR free result: */ + void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ unsigned int pc_ressize; /* result struct size */ unsigned int pc_count; /* call count */ -- cgit v1.2.3 From cc6acc20a606f9ca65a6338af8204b7ae9e67b1a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 19:01:48 +0200 Subject: sunrpc: properly type pc_decode callbacks Drop the argp argument as it can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to kxdrproc_t. Signed-off-by: Christoph Hellwig --- include/linux/lockd/xdr.h | 18 +++++++++--------- include/linux/lockd/xdr4.h | 18 +++++++++--------- include/linux/sunrpc/svc.h | 3 ++- 3 files changed, 20 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index d39ed1cc5fbf..0416600844ce 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -95,19 +95,19 @@ struct nlm_reboot { */ #define NLMSVC_XDRSIZE sizeof(struct nlm_args) -int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *); int nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *); +int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *); +int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *); int nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlmsvc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_decode_res(struct svc_rqst *, __be32 *); int nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *); -int nlmsvc_decode_void(struct svc_rqst *, __be32 *, void *); -int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlmsvc_decode_void(struct svc_rqst *, __be32 *); +int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *); int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlmsvc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *); +int nlmsvc_decode_notify(struct svc_rqst *, __be32 *); +int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *); /* int nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *); int nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *); diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index e58c88b52ce1..951bbe31fdb8 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -23,19 +23,19 @@ -int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *); int nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *); +int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *); +int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *); int nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlm4svc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_decode_res(struct svc_rqst *, __be32 *); int nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *); -int nlm4svc_decode_void(struct svc_rqst *, __be32 *, void *); -int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *); +int nlm4svc_decode_void(struct svc_rqst *, __be32 *); +int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *); int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); -int nlm4svc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *); -int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *); +int nlm4svc_decode_notify(struct svc_rqst *, __be32 *); +int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *); /* int nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *); int nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index c73194e9c2bd..d8703a5ab81e 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -422,7 +422,8 @@ struct svc_version { struct svc_procedure { /* process the request: */ __be32 (*pc_func)(struct svc_rqst *); - kxdrproc_t pc_decode; /* XDR decode args */ + /* XDR decode args: */ + int (*pc_decode)(struct svc_rqst *, __be32 *data); kxdrproc_t pc_encode; /* XDR encode result */ /* XDR free result: */ void (*pc_release)(struct svc_rqst *); -- cgit v1.2.3 From d16d1867215663907f3212590d1a9d32398a0f47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 19:42:02 +0200 Subject: sunrpc: properly type pc_encode callbacks Drop the resp argument as it can trivially be derived from the rqstp argument. With that all functions now have the same prototype, and we can remove the unsafe casting to kxdrproc_t. Signed-off-by: Christoph Hellwig Acked-by: Trond Myklebust --- include/linux/lockd/xdr.h | 8 ++++---- include/linux/lockd/xdr4.h | 8 ++++---- include/linux/sunrpc/svc.h | 3 ++- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 0416600844ce..7acbecc21a40 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -96,16 +96,16 @@ struct nlm_reboot { #define NLMSVC_XDRSIZE sizeof(struct nlm_args) int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_encode_testres(struct svc_rqst *, __be32 *); int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *); int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *); int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_encode_res(struct svc_rqst *, __be32 *); int nlmsvc_decode_res(struct svc_rqst *, __be32 *); -int nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *); +int nlmsvc_encode_void(struct svc_rqst *, __be32 *); int nlmsvc_decode_void(struct svc_rqst *, __be32 *); int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *); -int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *); int nlmsvc_decode_notify(struct svc_rqst *, __be32 *); int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *); /* diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 951bbe31fdb8..bf1645609225 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -24,16 +24,16 @@ int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_encode_testres(struct svc_rqst *, __be32 *); int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *); int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *); int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_encode_res(struct svc_rqst *, __be32 *); int nlm4svc_decode_res(struct svc_rqst *, __be32 *); -int nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *); +int nlm4svc_encode_void(struct svc_rqst *, __be32 *); int nlm4svc_decode_void(struct svc_rqst *, __be32 *); int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *); -int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *); +int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *); int nlm4svc_decode_notify(struct svc_rqst *, __be32 *); int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *); /* diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index d8703a5ab81e..bd9e313c444a 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -424,7 +424,8 @@ struct svc_procedure { __be32 (*pc_func)(struct svc_rqst *); /* XDR decode args: */ int (*pc_decode)(struct svc_rqst *, __be32 *data); - kxdrproc_t pc_encode; /* XDR encode result */ + /* XDR encode result: */ + int (*pc_encode)(struct svc_rqst *, __be32 *data); /* XDR free result: */ void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ -- cgit v1.2.3 From 408b3d46ae06e1d219f31cbe629789a5e5c862aa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 19:56:10 +0200 Subject: sunrpc: remove kxdrproc_t Remove the now unused typedef. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/xdr.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index ed0fbf0d8d0f..261b48a2701d 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -34,13 +34,6 @@ struct xdr_netobj { u8 * data; }; -/* - * This is the legacy generic XDR function. rqstp is either a rpc_rqst - * (client side) or svc_rqst pointer (server side). - * Encode functions always assume there's enough room in the buffer. - */ -typedef int (*kxdrproc_t)(void *rqstp, __be32 *data, void *obj); - /* * Basic structure for transmission/reception of a client XDR message. * Features a header (for a linear buffer containing RPC headers -- cgit v1.2.3 From 0becc1181cdba562730be4d4b8a5fcb4368ef527 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2017 23:40:27 +0200 Subject: sunrpc: move pc_count out of struct svc_procinfo pc_count is the only writeable memeber of struct svc_procinfo, which is a good candidate to be const-ified as it contains function pointers. This patch moves it into out out struct svc_procinfo, and into a separate writable array that is pointed to by struct svc_version. Signed-off-by: Christoph Hellwig --- include/linux/sunrpc/svc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index bd9e313c444a..bcd114f038ef 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -398,6 +398,7 @@ struct svc_version { u32 vs_vers; /* version number */ u32 vs_nproc; /* number of procedures */ struct svc_procedure * vs_proc; /* per-procedure info */ + unsigned int *vs_count; /* call counts */ u32 vs_xdrsize; /* xdrsize needed for this version */ /* Don't register with rpcbind */ @@ -430,7 +431,6 @@ struct svc_procedure { void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ unsigned int pc_ressize; /* result struct size */ - unsigned int pc_count; /* call count */ unsigned int pc_cachetype; /* cache info (NFS) */ unsigned int pc_xdrressize; /* maximum size of XDR reply */ }; -- cgit v1.2.3 From b9c744c19c441f306239ac3e60a2a95b40d698f8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 May 2017 16:11:49 +0200 Subject: sunrpc: mark all struct svc_procinfo instances as const struct svc_procinfo contains function pointers, and marking it as constant avoids it being able to be used as an attach vector for code injections. Signed-off-by: Christoph Hellwig --- include/linux/lockd/lockd.h | 4 ++-- include/linux/sunrpc/svc.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 41f7b6a04d69..3eca67728366 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -192,9 +192,9 @@ struct nlm_block { * Global variables */ extern const struct rpc_program nlm_program; -extern struct svc_procedure nlmsvc_procedures[]; +extern const struct svc_procedure nlmsvc_procedures[]; #ifdef CONFIG_LOCKD_V4 -extern struct svc_procedure nlmsvc_procedures4[]; +extern const struct svc_procedure nlmsvc_procedures4[]; #endif extern int nlmsvc_grace_period; extern unsigned long nlmsvc_timeout; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index bcd114f038ef..992ea3419795 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -237,7 +237,7 @@ struct svc_rqst { struct svc_serv * rq_server; /* RPC service definition */ struct svc_pool * rq_pool; /* thread pool */ - struct svc_procedure * rq_procinfo; /* procedure info */ + const struct svc_procedure *rq_procinfo;/* procedure info */ struct auth_ops * rq_authop; /* authentication flavour */ struct svc_cred rq_cred; /* auth info */ void * rq_xprt_ctxt; /* transport specific context ptr */ @@ -397,7 +397,7 @@ struct svc_program { struct svc_version { u32 vs_vers; /* version number */ u32 vs_nproc; /* number of procedures */ - struct svc_procedure * vs_proc; /* per-procedure info */ + const struct svc_procedure *vs_proc; /* per-procedure info */ unsigned int *vs_count; /* call counts */ u32 vs_xdrsize; /* xdrsize needed for this version */ -- cgit v1.2.3 From aa8217d5dcb1db594d816794ef6ab434ebf3e127 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 May 2017 16:21:37 +0200 Subject: sunrpc: mark all struct svc_version instances as const Signed-off-by: Christoph Hellwig Acked-by: Trond Myklebust --- include/linux/sunrpc/svc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 992ea3419795..eec04982a7ea 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -384,7 +384,7 @@ struct svc_program { unsigned int pg_lovers; /* lowest version */ unsigned int pg_hivers; /* highest version */ unsigned int pg_nvers; /* number of versions */ - struct svc_version ** pg_vers; /* version array */ + const struct svc_version **pg_vers; /* version array */ char * pg_name; /* service name */ char * pg_class; /* class name: services sharing authentication */ struct svc_stat * pg_stats; /* rpc statistics */ -- cgit v1.2.3 From a7a3b1e971cd806b81ecea3a234d8dae9de0add0 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 20 Jun 2017 08:33:44 -0400 Subject: NFS: convert flags to bool NFS uses some int, and unsigned int :1, and bool as flags in structs and args. Assert the preference for uniformly replacing these with the bool type. Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b28c83475ee8..9b42bffbe07b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -878,7 +878,7 @@ struct nfs3_readdirargs { struct nfs_fh * fh; __u64 cookie; __be32 verf[2]; - int plus; + bool plus; unsigned int count; struct page ** pages; }; @@ -909,7 +909,7 @@ struct nfs3_linkres { struct nfs3_readdirres { struct nfs_fattr * dir_attr; __be32 * verf; - int plus; + bool plus; }; struct nfs3_getaclres { @@ -1053,7 +1053,7 @@ struct nfs4_readdir_arg { struct page ** pages; /* zero-copy data */ unsigned int pgbase; /* zero-copy data */ const u32 * bitmask; - int plus; + bool plus; }; struct nfs4_readdir_res { @@ -1585,7 +1585,7 @@ struct nfs_rpc_ops { int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, const struct qstr *); int (*readdir) (struct dentry *, struct rpc_cred *, - u64, struct page **, unsigned int, int); + u64, struct page **, unsigned int, bool); int (*mknod) (struct inode *, struct dentry *, struct iattr *, dev_t); int (*statfs) (struct nfs_server *, struct nfs_fh *, @@ -1595,7 +1595,7 @@ struct nfs_rpc_ops { int (*pathconf) (struct nfs_server *, struct nfs_fh *, struct nfs_pathconf *); int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); - int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int); + int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, bool); int (*pgio_rpc_prepare)(struct rpc_task *, struct nfs_pgio_header *); void (*read_setup)(struct nfs_pgio_header *, struct rpc_message *); -- cgit v1.2.3 From 818a8dbe83fddff534b814a7d4e0c75b511dff2e Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 16 Jun 2017 11:13:00 -0400 Subject: NFS: nfs_rename() - revalidate directories on -ERESTARTSYS An interrupted rename will leave the old dentry behind if the rename succeeds. Fix this by forcing a lookup the next time through ->d_revalidate. A previous attempt at solving this problem took the approach to complete the work of the rename asynchronously, however that approach was wrong since it would allow the d_move() to occur after the directory's i_mutex had been dropped by the original process. Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9b42bffbe07b..9463eeff9e3c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1533,6 +1533,7 @@ struct nfs_renamedata { struct nfs_fattr new_fattr; void (*complete)(struct rpc_task *, struct nfs_renamedata *); long timeout; + bool cancelled; }; struct nfs_access_entry; -- cgit v1.2.3 From b5973a8c1ccf375c9ab9e2428e1185e3f799af06 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Jun 2017 19:35:36 -0400 Subject: NFS: Remove unused fields in the page I/O structures Remove the 'layout_private' fields that were only used by the pNFS OSD layout driver. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 1 - include/linux/nfs_xdr.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 247cc3d3498f..6138cf91346b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -94,7 +94,6 @@ struct nfs_pageio_descriptor { const struct nfs_pgio_completion_ops *pg_completion_ops; struct pnfs_layout_segment *pg_lseg; struct nfs_direct_req *pg_dreq; - void *pg_layout_private; unsigned int pg_bsize; /* default bsize for mirrors */ u32 pg_mirror_count; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9463eeff9e3c..7f1e04941763 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1436,7 +1436,6 @@ struct nfs_pgio_header { const struct nfs_pgio_completion_ops *completion_ops; const struct nfs_rw_ops *rw_ops; struct nfs_direct_req *dreq; - void *layout_private; spinlock_t lock; /* fields protected by lock */ int pnfs_error; -- cgit v1.2.3 From 919e3bd9a87593520a2c5dfda27bd3e6599852ed Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Jun 2017 19:35:37 -0400 Subject: NFS: Ensure we commit after writeback is complete If the page cache is being flushed, then we want to ensure that we do start a commit once the pages are done being flushed. If we just wait until all I/O is done to that file, we can end up livelocking until the balance_dirty_pages() mechanism puts its foot down and forces I/O to stop. So instead we do more or less the same thing that O_DIRECT does, and set up a counter to tell us when the flush is done, Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 1 + include/linux/nfs_xdr.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 6138cf91346b..abbee2d15dce 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -93,6 +93,7 @@ struct nfs_pageio_descriptor { const struct rpc_call_ops *pg_rpc_callops; const struct nfs_pgio_completion_ops *pg_completion_ops; struct pnfs_layout_segment *pg_lseg; + struct nfs_io_completion *pg_io_completion; struct nfs_direct_req *pg_dreq; unsigned int pg_bsize; /* default bsize for mirrors */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7f1e04941763..89093341f076 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1422,6 +1422,7 @@ enum { NFS_IOHDR_STAT, }; +struct nfs_io_completion; struct nfs_pgio_header { struct inode *inode; struct rpc_cred *cred; @@ -1435,6 +1436,7 @@ struct nfs_pgio_header { void (*release) (struct nfs_pgio_header *hdr); const struct nfs_pgio_completion_ops *completion_ops; const struct nfs_rw_ops *rw_ops; + struct nfs_io_completion *io_completion; struct nfs_direct_req *dreq; spinlock_t lock; /* fields protected by lock */ -- cgit v1.2.3 From 8dcbec6d20eb881ba368d0aebc3a8a678aebb1da Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 8 Jun 2017 11:52:44 -0400 Subject: NFSv4.1: Handle EXCHGID4_FLAG_CONFIRMED_R during NFSv4.1 migration Transparent State Migration copies a client's lease state from the server where a filesystem used to reside to the server where it now resides. When an NFSv4.1 client first contacts that destination server, it uses EXCHANGE_ID to detect trunking relationships. The lease that was copied there is returned to that client, but the destination server sets EXCHGID4_FLAG_CONFIRMED_R when replying to the client. This is because the lease was confirmed on the source server (before it was copied). Normally, when CONFIRMED_R is set, a client purges the lease and creates a new one. However, that throws away the entire benefit of Transparent State Migration. Therefore, the client must not purge that lease when it is possible that Transparent State Migration has occurred. Reported-by: Xuan Qi Signed-off-by: Chuck Lever Tested-by: Xuan Qi Signed-off-by: Anna Schumaker --- include/linux/nfs_fs_sb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index e418a1096662..74c44665e6d3 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -42,6 +42,7 @@ struct nfs_client { #define NFS_CS_MIGRATION 2 /* - transparent state migr */ #define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */ #define NFS_CS_NO_RETRANS_TIMEOUT 4 /* - Disable retransmit timeouts */ +#define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ @@ -210,6 +211,7 @@ struct nfs_server { unsigned long mig_status; #define NFS_MIG_IN_TRANSITION (1) #define NFS_MIG_FAILED (2) +#define NFS_MIG_TSM_POSSIBLE (3) void (*destroy)(struct nfs_server *); -- cgit v1.2.3 From f174ff7a0ab6a097455a94abfc99517940041c07 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 29 Jun 2017 06:34:51 -0700 Subject: nfs: add a nfs_ilookup helper This helper will allow to find an existing NFS inode by the file handle and fattr. Signed-off-by: Peng Tao [hch: split from a larger patch] Signed-off-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index bb0eb2c9acca..e52cc55ac300 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -332,6 +332,7 @@ extern void nfs_zap_caches(struct inode *); extern void nfs_invalidate_atime(struct inode *); extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *); +struct inode *nfs_ilookup(struct super_block *sb, struct nfs_fattr *, struct nfs_fh *); extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); -- cgit v1.2.3 From 5b5faaf6df73412af0278997db36dbcb51011d9d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 29 Jun 2017 06:34:52 -0700 Subject: nfs4: add NFSv4 LOOKUPP handlers This will be needed in order to implement the get_parent export op for nfsd. Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 1b1ca04820a3..47239c336688 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -479,6 +479,7 @@ enum { NFSPROC4_CLNT_ACCESS, NFSPROC4_CLNT_GETATTR, NFSPROC4_CLNT_LOOKUP, + NFSPROC4_CLNT_LOOKUPP, NFSPROC4_CLNT_LOOKUP_ROOT, NFSPROC4_CLNT_REMOVE, NFSPROC4_CLNT_RENAME, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 89093341f076..ca3bcc4ed4e5 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1012,7 +1012,6 @@ struct nfs4_link_res { struct nfs_fattr * dir_attr; }; - struct nfs4_lookup_arg { struct nfs4_sequence_args seq_args; const struct nfs_fh * dir_fh; @@ -1028,6 +1027,20 @@ struct nfs4_lookup_res { struct nfs4_label *label; }; +struct nfs4_lookupp_arg { + struct nfs4_sequence_args seq_args; + const struct nfs_fh *fh; + const u32 *bitmask; +}; + +struct nfs4_lookupp_res { + struct nfs4_sequence_res seq_res; + const struct nfs_server *server; + struct nfs_fattr *fattr; + struct nfs_fh *fh; + struct nfs4_label *label; +}; + struct nfs4_lookup_root_arg { struct nfs4_sequence_args seq_args; const u32 * bitmask; @@ -1569,6 +1582,8 @@ struct nfs_rpc_ops { int (*lookup) (struct inode *, const struct qstr *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *); + int (*lookupp) (struct inode *, struct nfs_fh *, + struct nfs_fattr *, struct nfs4_label *); int (*access) (struct inode *, struct nfs_access_entry *); int (*readlink)(struct inode *, struct page *, unsigned int, unsigned int); -- cgit v1.2.3 From b4f937cffa66b3d56eb8f586e620d0b223a281a3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Jul 2017 17:53:48 -0400 Subject: NFS: Don't run wake_up_bit() when nobody is waiting... "perf lock" shows fairly heavy contention for the bit waitqueue locks when doing an I/O heavy workload. Use a bit to tell whether or not there has been contention for a lock so that we can optimise away the bit waitqueue options in those cases. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index abbee2d15dce..d67b67ae6c8b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -33,6 +33,8 @@ enum { PG_UPTODATE, /* page group sync bit in read path */ PG_WB_END, /* page group sync bit in write path */ PG_REMOVE, /* page group sync bit in write path */ + PG_CONTENDED1, /* Is someone waiting for a lock? */ + PG_CONTENDED2, /* Is someone waiting for a lock? */ }; struct nfs_inode; -- cgit v1.2.3 From d3457c877b14aaee8c52923eedf05a3b78af0476 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 14 Jul 2017 17:13:20 +0300 Subject: kvm: x86: hyperv: make VP_INDEX managed by userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hyper-V identifies vCPUs by Virtual Processor Index, which can be queried via HV_X64_MSR_VP_INDEX msr. It is defined by the spec as a sequential number which can't exceed the maximum number of vCPUs per VM. APIC ids can be sparse and thus aren't a valid replacement for VP indices. Current KVM uses its internal vcpu index as VP_INDEX. However, to make it predictable and persistent across VM migrations, the userspace has to control the value of VP_INDEX. This patch achieves that, by storing vp_index explicitly on vcpu, and allowing HV_X64_MSR_VP_INDEX to be set from the host side. For compatibility it's initialized to KVM vcpu index. Also a few variables are renamed to make clear distinction betweed this Hyper-V vp_index and KVM vcpu_id (== APIC id). Besides, a new capability, KVM_CAP_HYPERV_VP_INDEX, is added to allow the userspace to skip attempting msr writes where unsupported, to avoid spamming error logs. Signed-off-by: Roman Kagan Signed-off-by: Radim Krčmář --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 38b2cfbc8112..6cd63c18708a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -928,6 +928,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_FWNMI 146 #define KVM_CAP_PPC_SMT_POSSIBLE 147 #define KVM_CAP_HYPERV_SYNIC2 148 +#define KVM_CAP_HYPERV_VP_INDEX 149 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 9049f2f6e7bdfb5de0c63c2635bf3cdb70c4efb5 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 14 Jul 2017 14:49:52 -0700 Subject: fault-inject: parse as natural 1-based value for fail-nth write interface The value written to fail-nth file is parsed as 0-based. Parsing as one-based is more natural to understand and it enables to cancel the previous setup by simply writing '0'. This change also converts task->fail_nth from signed to unsigned int. Link: http://lkml.kernel.org/r/1491490561-10485-3-git-send-email-akinobu.mita@gmail.com Signed-off-by: Akinobu Mita Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3822d749fc9e..2ba9ec93423f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -974,7 +974,7 @@ struct task_struct { #ifdef CONFIG_FAULT_INJECTION int make_it_fail; - int fail_nth; + unsigned int fail_nth; #endif /* * When (nr_dirtied >= nr_dirtied_pause), it's time to call -- cgit v1.2.3 From 077d2ba519b2e8bf1abd80cbade699b1de42cafe Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Fri, 14 Jul 2017 17:28:12 -0400 Subject: replace incorrect strscpy use in FORTIFY_SOURCE Using strscpy was wrong because FORTIFY_SOURCE is passing the maximum possible size of the outermost object, but strscpy defines the count parameter as the exact buffer size, so this could copy past the end of the source. This would still be wrong with the planned usage of __builtin_object_size(p, 1) for intra-object overflow checks since it's the maximum possible size of the specified object with no guarantee of it being that large. Reuse of the fortified functions like this currently makes the runtime error reporting less precise but that can be improved later on. Noticed by Dave Jones and KASAN. Signed-off-by: Daniel Micay Acked-by: Kees Cook Reported-by: Dave Jones Signed-off-by: Linus Torvalds --- include/linux/string.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index 96f5a5fd0377..049866760e8b 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -202,17 +202,6 @@ void __read_overflow2(void) __compiletime_error("detected read beyond size of ob void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter"); #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) -__FORTIFY_INLINE char *strcpy(char *p, const char *q) -{ - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); - if (p_size == (size_t)-1 && q_size == (size_t)-1) - return __builtin_strcpy(p, q); - if (strscpy(p, q, p_size < q_size ? p_size : q_size) < 0) - fortify_panic(__func__); - return p; -} - __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) { size_t p_size = __builtin_object_size(p, 0); @@ -391,6 +380,18 @@ __FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp) fortify_panic(__func__); return __real_kmemdup(p, size, gfp); } + +/* defined after fortified strlen and memcpy to reuse them */ +__FORTIFY_INLINE char *strcpy(char *p, const char *q) +{ + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (p_size == (size_t)-1 && q_size == (size_t)-1) + return __builtin_strcpy(p, q); + memcpy(p, q, strlen(q) + 1); + return p; +} + #endif #endif /* _LINUX_STRING_H_ */ -- cgit v1.2.3