From e9e575b8f29445bcde67f421891efa4d6527d987 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 18 Sep 2018 19:10:39 -0700 Subject: gss_krb5: Remove VLA usage of skcipher In the quest to remove all stack VLA usage from the kernel[1], this replaces struct crypto_skcipher and SKCIPHER_REQUEST_ON_STACK() usage with struct crypto_sync_skcipher and SYNC_SKCIPHER_REQUEST_ON_STACK(), which uses a fixed stack size. [1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com Cc: Trond Myklebust Cc: Anna Schumaker Cc: "J. Bruce Fields" Cc: Jeff Layton Cc: YueHaibing Cc: linux-nfs@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Herbert Xu --- include/linux/sunrpc/gss_krb5.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 7df625d41e35..f6e8ceafafd8 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -71,10 +71,10 @@ struct gss_krb5_enctype { const u32 keyed_cksum; /* is it a keyed cksum? */ const u32 keybytes; /* raw key len, in bytes */ const u32 keylength; /* final key len, in bytes */ - u32 (*encrypt) (struct crypto_skcipher *tfm, + u32 (*encrypt) (struct crypto_sync_skcipher *tfm, void *iv, void *in, void *out, int length); /* encryption function */ - u32 (*decrypt) (struct crypto_skcipher *tfm, + u32 (*decrypt) (struct crypto_sync_skcipher *tfm, void *iv, void *in, void *out, int length); /* decryption function */ u32 (*mk_key) (const struct gss_krb5_enctype *gk5e, @@ -98,12 +98,12 @@ struct krb5_ctx { u32 enctype; u32 flags; const struct gss_krb5_enctype *gk5e; /* enctype-specific info */ - struct crypto_skcipher *enc; - struct crypto_skcipher *seq; - struct crypto_skcipher *acceptor_enc; - struct crypto_skcipher *initiator_enc; - struct crypto_skcipher *acceptor_enc_aux; - struct crypto_skcipher *initiator_enc_aux; + struct crypto_sync_skcipher *enc; + struct crypto_sync_skcipher *seq; + struct crypto_sync_skcipher *acceptor_enc; + struct crypto_sync_skcipher *initiator_enc; + struct crypto_sync_skcipher *acceptor_enc_aux; + struct crypto_sync_skcipher *initiator_enc_aux; u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */ u8 cksum[GSS_KRB5_MAX_KEYLEN]; s32 endtime; @@ -262,24 +262,24 @@ gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, u32 -krb5_encrypt(struct crypto_skcipher *key, +krb5_encrypt(struct crypto_sync_skcipher *key, void *iv, void *in, void *out, int length); u32 -krb5_decrypt(struct crypto_skcipher *key, +krb5_decrypt(struct crypto_sync_skcipher *key, void *iv, void *in, void *out, int length); int -gss_encrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *outbuf, +gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *outbuf, int offset, struct page **pages); int -gss_decrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *inbuf, +gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *inbuf, int offset); s32 krb5_make_seq_num(struct krb5_ctx *kctx, - struct crypto_skcipher *key, + struct crypto_sync_skcipher *key, int direction, u32 seqnum, unsigned char *cksum, unsigned char *buf); @@ -320,12 +320,12 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, int krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, - struct crypto_skcipher *cipher, + struct crypto_sync_skcipher *cipher, unsigned char *cksum); int krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, - struct crypto_skcipher *cipher, + struct crypto_sync_skcipher *cipher, s32 seqnum); void gss_krb5_make_confounder(char *p, u32 conflen); -- cgit v1.2.3 From 9dc6edcf676fe188430e8b119f91280bbf285163 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Aug 2018 14:24:16 -0400 Subject: SUNRPC: Clean up initialisation of the struct rpc_rqst Move the initialisation back into xprt.c. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 336fd1a19cca..3d80524e92d6 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -325,7 +325,6 @@ struct xprt_class { struct rpc_xprt *xprt_create_transport(struct xprt_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); -void xprt_request_init(struct rpc_task *task); void xprt_retry_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); -- cgit v1.2.3 From 3021a5bbbf0aa0252f2993b84ee903a0eca0b690 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Aug 2018 13:50:21 -0400 Subject: SUNRPC: The transmitted message must lie in the RPCSEC window of validity If a message has been encoded using RPCSEC_GSS, the server is maintaining a window of sequence numbers that it considers valid. The client should normally be tracking that window, and needs to verify that the sequence number used by the message being transmitted still lies inside the window of validity. So far, we've been able to assume this condition would be realised automatically, since the client has been encoding the message only after taking the socket lock. Once we change that condition, we will need the explicit check. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 2 ++ include/linux/sunrpc/auth_gss.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 58a6765c1c5e..2c97a3933ef9 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -157,6 +157,7 @@ struct rpc_credops { int (*crkey_timeout)(struct rpc_cred *); bool (*crkey_to_expire)(struct rpc_cred *); char * (*crstringify_acceptor)(struct rpc_cred *); + bool (*crneed_reencode)(struct rpc_task *); }; extern const struct rpc_authops authunix_ops; @@ -192,6 +193,7 @@ __be32 * rpcauth_marshcred(struct rpc_task *, __be32 *); __be32 * rpcauth_checkverf(struct rpc_task *, __be32 *); int rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj); int rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj); +bool rpcauth_xmit_need_reencode(struct rpc_task *task); int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *); diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 0c9eac351aab..30427b729070 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -70,6 +70,7 @@ struct gss_cl_ctx { refcount_t count; enum rpc_gss_proc gc_proc; u32 gc_seq; + u32 gc_seq_xmit; spinlock_t gc_seq_lock; struct gss_ctx *gc_gss_ctx; struct xdr_netobj gc_wire_ctx; -- cgit v1.2.3 From 7ebbbc6e7bd023903daa5bd95726edf2d60b559c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 28 Aug 2018 09:00:27 -0400 Subject: SUNRPC: Simplify identification of when the message send/receive is complete Add states to indicate that the message send and receive are not yet complete. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 592653becd91..9e655df70131 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -140,8 +140,10 @@ struct rpc_task_setup { #define RPC_TASK_RUNNING 0 #define RPC_TASK_QUEUED 1 #define RPC_TASK_ACTIVE 2 -#define RPC_TASK_MSG_RECV 3 -#define RPC_TASK_MSG_RECV_WAIT 4 +#define RPC_TASK_NEED_XMIT 3 +#define RPC_TASK_NEED_RECV 4 +#define RPC_TASK_MSG_RECV 5 +#define RPC_TASK_MSG_RECV_WAIT 6 #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) -- cgit v1.2.3 From d1109aa56c71e19fc117e75bff11384fc7279a3b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 13 Aug 2018 15:48:42 -0400 Subject: SUNRPC: Rename TCP receive-specific state variables Since we will want to introduce similar TCP state variables for the transmission of requests, let's rename the existing ones to label that they are for the receive side. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index ae0f99b9b965..90d5ca8e65f4 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -30,15 +30,17 @@ struct sock_xprt { /* * State of TCP reply receive */ - __be32 tcp_fraghdr, - tcp_xid, - tcp_calldir; + struct { + __be32 fraghdr, + xid, + calldir; - u32 tcp_offset, - tcp_reclen; + u32 offset, + len; - unsigned long tcp_copied, - tcp_flags; + unsigned long copied, + flags; + } recv; /* * Connection of transports -- cgit v1.2.3 From 6c7a64e5a44dbc6d073b83a56a48d0a4099f1dd2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 13 Aug 2018 16:54:57 -0400 Subject: SUNRPC: Add socket transmit queue offset tracking Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 90d5ca8e65f4..005cfb6e7238 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -42,6 +42,13 @@ struct sock_xprt { flags; } recv; + /* + * State of TCP transmit queue + */ + struct { + u32 offset; + } xmit; + /* * Connection of transports */ -- cgit v1.2.3 From cf9946cd6144410ced00d52586ff5a2cb4868fc5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Aug 2018 12:55:34 -0400 Subject: SUNRPC: Refactor the transport request pinning We are going to need to pin for both send and receive. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 3 +-- include/linux/sunrpc/xprt.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 9e655df70131..8062ce6b18e5 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -142,8 +142,7 @@ struct rpc_task_setup { #define RPC_TASK_ACTIVE 2 #define RPC_TASK_NEED_XMIT 3 #define RPC_TASK_NEED_RECV 4 -#define RPC_TASK_MSG_RECV 5 -#define RPC_TASK_MSG_RECV_WAIT 6 +#define RPC_TASK_MSG_PIN_WAIT 5 #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3d80524e92d6..bd743c51a865 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -103,6 +103,7 @@ struct rpc_rqst { /* A cookie used to track the state of the transport connection */ + atomic_t rq_pin; /* * Partial send handling -- cgit v1.2.3 From 359c48c04af25397ecefec1ccf200ddd199617ce Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 29 Aug 2018 09:22:28 -0400 Subject: SUNRPC: Add a helper to wake up a sleeping rpc_task and set its status Add a helper that will wake up a task that is sleeping on a specific queue, and will set the value of task->tk_status. This is mainly intended for use by the transport layer to notify the task of an error condition. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 8062ce6b18e5..8840a420cf4c 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -235,6 +235,9 @@ void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq, struct rpc_task *task); void rpc_wake_up_queued_task(struct rpc_wait_queue *, struct rpc_task *); +void rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *, + struct rpc_task *, + int); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq, -- cgit v1.2.3 From 75c84151a9dc7a755c607e6761d8f14a1690dbf0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 31 Aug 2018 10:21:00 -0400 Subject: SUNRPC: Rename xprt->recv_lock to xprt->queue_lock We will use the same lock to protect both the transmit and receive queues. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index bd743c51a865..c25d0a5fda69 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -235,7 +235,7 @@ struct rpc_xprt { */ spinlock_t transport_lock; /* lock transport info */ spinlock_t reserve_lock; /* lock slot table */ - spinlock_t recv_lock; /* lock receive list */ + spinlock_t queue_lock; /* send/receive queue lock */ u32 xid; /* Next XID value to use */ struct rpc_task * snd_task; /* Task blocked in send */ struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ -- cgit v1.2.3 From edc81dcd5b7f699c4049042b35c904396642032e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Aug 2018 17:55:46 -0400 Subject: SUNRPC: Refactor xprt_transmit() to remove the reply queue code Separate out the action of adding a request to the reply queue so that the backchannel code can simply skip calling it altogether. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index c25d0a5fda69..0250294c904a 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -334,6 +334,7 @@ void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req); void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); bool xprt_prepare_transmit(struct rpc_task *task); +void xprt_request_enqueue_receive(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); void xprt_end_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); -- cgit v1.2.3 From 7f3a1d1e1806a0eb9b200e3aed2a04431f2bcc6a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 23 Aug 2018 00:03:43 -0400 Subject: SUNRPC: Refactor xprt_transmit() to remove wait for reply code Allow the caller in clnt.c to call into the code to wait for a reply after calling xprt_transmit(). Again, the reason is that the backchannel code does not need this functionality. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 0250294c904a..4fa2af087cff 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -335,6 +335,7 @@ void xprt_free_slot(struct rpc_xprt *xprt, void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); bool xprt_prepare_transmit(struct rpc_task *task); void xprt_request_enqueue_receive(struct rpc_task *task); +void xprt_request_wait_receive(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); void xprt_end_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); -- cgit v1.2.3 From ef3f54347f690d06649c0d7a1f63d3410b3d08d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Aug 2018 09:23:32 -0400 Subject: SUNRPC: Distinguish between the slot allocation list and receive queue When storing a struct rpc_rqst on the slot allocation list, we currently use the same field 'rq_list' as we use to store the request on the receive queue. Since the structure is never on both lists at the same time, this is OK. However, for clarity, let's make that a union with different names for the different lists so that we can more easily distinguish between the two states. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 4fa2af087cff..9cec2d0811f2 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -82,7 +82,11 @@ struct rpc_rqst { struct page **rq_enc_pages; /* scratch pages for use by gss privacy code */ void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ - struct list_head rq_list; + + union { + struct list_head rq_list; /* Slot allocation list */ + struct list_head rq_recv; /* Receive queue */ + }; void *rq_buffer; /* Call XDR encode buffer */ size_t rq_callsize; @@ -249,7 +253,8 @@ struct rpc_xprt { struct list_head bc_pa_list; /* List of preallocated * backchannel rpc_rqst's */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */ - struct list_head recv; + + struct list_head recv_queue; /* Receive queue */ struct { unsigned long bind_count, /* total number of binds */ -- cgit v1.2.3 From 944b042921a17d1a4e51bb05f8edf2b93d26e36f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 9 Aug 2018 23:33:21 -0400 Subject: SUNRPC: Add a transmission queue for RPC requests Add the queue that will enforce the ordering of RPC task transmission. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 9cec2d0811f2..81a6c2c8dfc7 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -88,6 +88,8 @@ struct rpc_rqst { struct list_head rq_recv; /* Receive queue */ }; + struct list_head rq_xmit; /* Send queue */ + void *rq_buffer; /* Call XDR encode buffer */ size_t rq_callsize; void *rq_rbuffer; /* Reply XDR decode buffer */ @@ -242,6 +244,9 @@ struct rpc_xprt { spinlock_t queue_lock; /* send/receive queue lock */ u32 xid; /* Next XID value to use */ struct rpc_task * snd_task; /* Task blocked in send */ + + struct list_head xmit_queue; /* Send queue */ + struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct svc_serv *bc_serv; /* The RPC service which will */ @@ -339,6 +344,7 @@ void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req); void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); bool xprt_prepare_transmit(struct rpc_task *task); +void xprt_request_enqueue_transmit(struct rpc_task *task); void xprt_request_enqueue_receive(struct rpc_task *task); void xprt_request_wait_receive(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); -- cgit v1.2.3 From 762e4e67b356ab7b8fbfc39bc07dc6110121505e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 24 Aug 2018 16:28:28 -0400 Subject: SUNRPC: Refactor RPC call encoding Move the call encoding so that it occurs before the transport connection etc. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 81a6c2c8dfc7..b8a7de161f67 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -347,6 +347,7 @@ bool xprt_prepare_transmit(struct rpc_task *task); void xprt_request_enqueue_transmit(struct rpc_task *task); void xprt_request_enqueue_receive(struct rpc_task *task); void xprt_request_wait_receive(struct rpc_task *task); +bool xprt_request_need_retransmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); void xprt_end_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); -- cgit v1.2.3 From 902c58872e1e9a2c146a55b0701c0b26cc5a4b24 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 1 Sep 2018 17:21:01 -0400 Subject: SUNRPC: Fix up the back channel transmit Fix up the back channel code to recognise that it has already been transmitted, so does not need to be called again. Also ensure that we set req->rq_task. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/bc_xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 4397a4824c81..28721cf73ec3 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -34,6 +34,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef CONFIG_SUNRPC_BACKCHANNEL struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid); void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); +void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); -- cgit v1.2.3 From 50f484e298218b7271fad8a23bd44c82fb3110e1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 30 Aug 2018 13:27:29 -0400 Subject: SUNRPC: Treat the task and request as separate in the xprt_ops->send_request() When we shift to using the transmit queue, then the task that holds the write lock will not necessarily be the same as the one being transmitted. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index b8a7de161f67..8c2bb078f00c 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -140,7 +140,7 @@ struct rpc_xprt_ops { void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(struct rpc_task *task); - int (*send_request)(struct rpc_task *task); + int (*send_request)(struct rpc_rqst *req, struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_request)(struct rpc_task *task); -- cgit v1.2.3 From 918f3c1fe83c5baa4892b943d3f5ac7191d8fb74 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 9 Sep 2018 11:37:22 -0400 Subject: SUNRPC: Improve latency for interactive tasks One of the intentions with the priority queues was to ensure that no single process can hog the transport. The field task->tk_owner therefore identifies the RPC call's origin, and is intended to allow the RPC layer to organise queues for fairness. This commit therefore modifies the transmit queue to group requests by task->tk_owner, and ensures that we round robin among those groups. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 8c2bb078f00c..e377620b9744 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -89,6 +89,7 @@ struct rpc_rqst { }; struct list_head rq_xmit; /* Send queue */ + struct list_head rq_xmit2; /* Send queue */ void *rq_buffer; /* Call XDR encode buffer */ size_t rq_callsize; -- cgit v1.2.3 From 75891f502f5fc70f52a01af5b924384ed4866907 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Sep 2018 17:37:36 -0400 Subject: SUNRPC: Support for congestion control when queuing is enabled Both RDMA and UDP transports require the request to get a "congestion control" credit before they can be transmitted. Right now, this is done when the request locks the socket. We'd like it to happen when a request attempts to be transmitted for the first time. In order to support retransmission of requests that already hold such credits, we also want to ensure that they get queued first, so that we don't deadlock with requests that have yet to obtain a credit. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index e377620b9744..0d0cc127615e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -397,6 +397,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied); void xprt_pin_rqst(struct rpc_rqst *req); void xprt_unpin_rqst(struct rpc_rqst *req); void xprt_release_rqst_cong(struct rpc_task *task); +bool xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); @@ -415,6 +416,7 @@ void xprt_unlock_connect(struct rpc_xprt *, void *); #define XPRT_BINDING (5) #define XPRT_CLOSING (6) #define XPRT_CONGESTED (9) +#define XPRT_CWND_WAIT (10) static inline void xprt_set_connected(struct rpc_xprt *xprt) { -- cgit v1.2.3 From 36bd7de949f41d586ef7794169af75462b67acbc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Sep 2018 18:41:32 -0400 Subject: SUNRPC: Turn off throttling of RPC slots for TCP sockets The theory was that we would need to grab the socket lock anyway, so we might as well use it to gate the allocation of RPC slots for a TCP socket. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 0d0cc127615e..14c9b4d49fb4 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -343,7 +343,6 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req); -void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); bool xprt_prepare_transmit(struct rpc_task *task); void xprt_request_enqueue_transmit(struct rpc_task *task); void xprt_request_enqueue_receive(struct rpc_task *task); -- cgit v1.2.3 From c544577daddb618c7dd5fa7fb98d6a41782f020e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Sep 2018 23:39:27 -0400 Subject: SUNRPC: Clean up transport write space handling Treat socket write space handling in the same way we now treat transport congestion: by denying the XPRT_LOCK until the transport signals that it has free buffer space. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/svc_xprt.h | 1 - include/linux/sunrpc/xprt.h | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index c3d72066d4b1..6b7a86c4d6e6 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -84,7 +84,6 @@ struct svc_xprt { struct sockaddr_storage xpt_remote; /* remote peer's address */ size_t xpt_remotelen; /* length of address */ char xpt_remotebuf[INET6_ADDRSTRLEN + 10]; - struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ struct list_head xpt_users; /* callbacks on free */ struct net *xpt_net; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 14c9b4d49fb4..5600242ccbf9 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -387,8 +387,8 @@ int xprt_load_transport(const char *); void xprt_set_retrans_timeout_def(struct rpc_task *task); void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); -void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action); -void xprt_write_space(struct rpc_xprt *xprt); +void xprt_wait_for_buffer_space(struct rpc_xprt *xprt); +bool xprt_write_space(struct rpc_xprt *xprt); void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid); void xprt_update_rtt(struct rpc_task *task); @@ -416,6 +416,7 @@ void xprt_unlock_connect(struct rpc_xprt *, void *); #define XPRT_CLOSING (6) #define XPRT_CONGESTED (9) #define XPRT_CWND_WAIT (10) +#define XPRT_WRITE_SPACE (11) static inline void xprt_set_connected(struct rpc_xprt *xprt) { -- cgit v1.2.3 From adfa71446dd0943ba376eff3e05c7c89582f8038 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Sep 2018 23:58:59 -0400 Subject: SUNRPC: Cleanup: remove the unused 'task' argument from the request_send() Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 5600242ccbf9..823860cce0bc 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -141,7 +141,7 @@ struct rpc_xprt_ops { void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(struct rpc_task *task); - int (*send_request)(struct rpc_rqst *req, struct rpc_task *task); + int (*send_request)(struct rpc_rqst *req); void (*set_retrans_timeout)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_request)(struct rpc_task *task); -- cgit v1.2.3 From 95f7691daa57bbd68caac2bdad79e0b08f4d46c1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 7 Sep 2018 08:35:22 -0400 Subject: SUNRPC: Convert xprt receive queue to use an rbtree If the server is slow, we can find ourselves with quite a lot of entries on the receive queue. Converting the search from an O(n) to O(log(n)) can make a significant difference, particularly since we have to hold a number of locks while searching. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 823860cce0bc..9be399020dab 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -85,7 +85,7 @@ struct rpc_rqst { union { struct list_head rq_list; /* Slot allocation list */ - struct list_head rq_recv; /* Receive queue */ + struct rb_node rq_recv; /* Receive queue */ }; struct list_head rq_xmit; /* Send queue */ @@ -260,7 +260,7 @@ struct rpc_xprt { * backchannel rpc_rqst's */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */ - struct list_head recv_queue; /* Receive queue */ + struct rb_root recv_queue; /* Receive queue */ struct { unsigned long bind_count, /* total number of binds */ -- cgit v1.2.3 From f42f7c283078ce3c1e8368b140e270755b1ae313 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 8 Sep 2018 22:09:48 -0400 Subject: SUNRPC: Fix priority queue fairness Fix up the priority queue to not batch by owner, but by queue, so that we allow '1 << priority' elements to be dequeued before switching to the next priority queue. The owner field is still used to wake up requests in round robin order by owner to avoid single processes hogging the RPC layer by loading the queues. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 8840a420cf4c..7b540c066594 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -189,7 +189,6 @@ struct rpc_timer { struct rpc_wait_queue { spinlock_t lock; struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */ - pid_t owner; /* process id of last task serviced */ unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ unsigned char nr; /* # tasks remaining for cookie */ @@ -205,7 +204,6 @@ struct rpc_wait_queue { * from a single cookie. The aim is to improve * performance of NFS operations such as read/write. */ -#define RPC_BATCH_COUNT 16 #define RPC_IS_PRIORITY(q) ((q)->maxpriority > 0) /* -- cgit v1.2.3 From 431f6eb3570f286036bc8718a908a283f5d99473 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Sep 2018 00:08:20 -0400 Subject: SUNRPC: Add a label for RPC calls that require allocation on receive If the RPC call relies on the receive call allocating pages as buffers, then let's label it so that we a) Don't leak memory by allocating pages for requests that do not expect this behaviour b) Can optimise for the common case where calls do not require allocation. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 2bd68177a442..431829233392 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -58,6 +58,7 @@ struct xdr_buf { flags; /* Flags for data disposition */ #define XDRBUF_READ 0x01 /* target of file read */ #define XDRBUF_WRITE 0x02 /* source of file write */ +#define XDRBUF_SPARSE_PAGES 0x04 /* Page array is sparse */ unsigned int buflen, /* Total length of storage buffer */ len; /* Length of XDR encoded message */ -- cgit v1.2.3 From 9d96acbc7f376dc1ffcedca0c349dd3389187a38 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Sep 2018 12:22:04 -0400 Subject: SUNRPC: Add a bvec array to struct xdr_buf for use with iovec_iter() Add a bvec array to struct xdr_buf, and have the client allocate it when we need to receive data into pages. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 7 +++++++ include/linux/sunrpc/xprt.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 431829233392..745587132a87 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -18,6 +18,7 @@ #include #include +struct bio_vec; struct rpc_rqst; /* @@ -52,6 +53,7 @@ struct xdr_buf { struct kvec head[1], /* RPC header + non-page data */ tail[1]; /* Appended after page data */ + struct bio_vec *bvec; struct page ** pages; /* Array of pages */ unsigned int page_base, /* Start of page data */ page_len, /* Length of page data */ @@ -70,6 +72,8 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) buf->head[0].iov_base = start; buf->head[0].iov_len = len; buf->tail[0].iov_len = 0; + buf->bvec = NULL; + buf->pages = NULL; buf->page_len = 0; buf->flags = 0; buf->len = 0; @@ -116,6 +120,9 @@ __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); void xdr_terminate_string(struct xdr_buf *, const u32); +size_t xdr_buf_pagecount(struct xdr_buf *buf); +int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp); +void xdr_free_bvec(struct xdr_buf *buf); static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len) { diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 9be399020dab..a4ab4f8d9140 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -141,6 +141,7 @@ struct rpc_xprt_ops { void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(struct rpc_task *task); + void (*prepare_request)(struct rpc_rqst *req); int (*send_request)(struct rpc_rqst *req); void (*set_retrans_timeout)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); @@ -343,6 +344,7 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req); +void xprt_request_prepare(struct rpc_rqst *req); bool xprt_prepare_transmit(struct rpc_task *task); void xprt_request_enqueue_transmit(struct rpc_task *task); void xprt_request_enqueue_receive(struct rpc_task *task); -- cgit v1.2.3 From 277e4ab7d530bf287e02b65cfcd3ea8f489784f6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 14 Sep 2018 09:49:06 -0400 Subject: SUNRPC: Simplify TCP receive code by switching to using iterators Most of this code should also be reusable with other socket types. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 005cfb6e7238..458bfe0137f5 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -31,15 +31,16 @@ struct sock_xprt { * State of TCP reply receive */ struct { - __be32 fraghdr, + struct { + __be32 fraghdr, xid, calldir; + } __attribute__((packed)); u32 offset, len; - unsigned long copied, - flags; + unsigned long copied; } recv; /* @@ -76,21 +77,9 @@ struct sock_xprt { void (*old_error_report)(struct sock *); }; -/* - * TCP receive state flags - */ -#define TCP_RCV_LAST_FRAG (1UL << 0) -#define TCP_RCV_COPY_FRAGHDR (1UL << 1) -#define TCP_RCV_COPY_XID (1UL << 2) -#define TCP_RCV_COPY_DATA (1UL << 3) -#define TCP_RCV_READ_CALLDIR (1UL << 4) -#define TCP_RCV_COPY_CALLDIR (1UL << 5) - /* * TCP RPC flags */ -#define TCP_RPC_REPLY (1UL << 6) - #define XPRT_SOCK_CONNECTING 1U #define XPRT_SOCK_DATA_READY (2) #define XPRT_SOCK_UPD_TIMEOUT (3) -- cgit v1.2.3 From 550aebfe1c573518c35ae85d6ffbdc2d44c92703 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 14 Sep 2018 14:32:45 -0400 Subject: SUNRPC: Allow AF_LOCAL sockets to use the generic stream receive Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 745587132a87..8815be7cae72 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -185,7 +185,6 @@ struct xdr_skb_reader { typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len); -size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len); extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, struct xdr_skb_reader *, xdr_skb_read_actor); -- cgit v1.2.3 From ec846469ba7bdb81e42c04e4e15d8fbf19e426e2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 14 Sep 2018 14:38:05 -0400 Subject: SUNRPC: Unexport xdr_partial_copy_from_skb() It is no longer used outside of net/sunrpc/socklib.c Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 8815be7cae72..43106ffa6788 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -186,8 +186,6 @@ struct xdr_skb_reader { typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len); extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); -extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, - struct xdr_skb_reader *, xdr_skb_read_actor); extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32); extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *); -- cgit v1.2.3 From 571ed1fd2390f74e4c1f46994f753fb0d29285e4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 29 Sep 2018 16:00:43 -0400 Subject: SUNRPC: Replace krb5_seq_lock with a lockless scheme Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_krb5.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 7df625d41e35..69f749afa617 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -118,7 +118,8 @@ struct krb5_ctx { u8 acceptor_integ[GSS_KRB5_MAX_KEYLEN]; }; -extern spinlock_t krb5_seq_lock; +extern u32 gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx); +extern u64 gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx); /* The length of the Kerberos GSS token header */ #define GSS_KRB5_TOK_HDR_LEN (16) -- cgit v1.2.3 From 608a0ab2f54ab0e301ad76a41aad979ea0d02670 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:44 -0400 Subject: SUNRPC: Add lockless lookup of the server's auth domain Avoid taking the global auth_domain_lock in most lookups of the auth domain by adding an RCU protected lookup. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svcauth.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 04e404a07882..3e53a6e2ada7 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -82,6 +82,7 @@ struct auth_domain { struct hlist_node hash; char *name; struct auth_ops *flavour; + struct rcu_head rcu_head; }; /* -- cgit v1.2.3 From 07d02a67b7faae56e184f6c35f78de47f06da37f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 12 Oct 2018 13:28:26 -0400 Subject: SUNRPC: Simplify lookup code We no longer need to worry about whether or not the entry is hashed in order to figure out if the contents are valid. We only care whether or not the refcount is non-zero. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 2c97a3933ef9..a7fc8f5a2dad 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -206,11 +206,11 @@ bool rpcauth_cred_key_to_expire(struct rpc_auth *, struct rpc_cred *); char * rpcauth_stringify_acceptor(struct rpc_cred *); static inline -struct rpc_cred * get_rpccred(struct rpc_cred *cred) +struct rpc_cred *get_rpccred(struct rpc_cred *cred) { - if (cred != NULL) - atomic_inc(&cred->cr_count); - return cred; + if (cred != NULL && atomic_inc_not_zero(&cred->cr_count)) + return cred; + return NULL; } /** @@ -226,9 +226,7 @@ struct rpc_cred * get_rpccred(struct rpc_cred *cred) static inline struct rpc_cred * get_rpccred_rcu(struct rpc_cred *cred) { - if (atomic_inc_not_zero(&cred->cr_count)) - return cred; - return NULL; + return get_rpccred(cred); } #endif /* __KERNEL__ */ -- cgit v1.2.3 From 79b181810285a6b9b7a1aed25c365c9e1782e22a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Oct 2018 10:34:31 -0400 Subject: SUNRPC: Convert auth creds to use refcount_t Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index a7fc8f5a2dad..a71d4bd191e7 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -67,7 +67,7 @@ struct rpc_cred { const struct rpc_credops *cr_ops; unsigned long cr_expire; /* when to gc */ unsigned long cr_flags; /* various flags */ - atomic_t cr_count; /* ref count */ + refcount_t cr_count; /* ref count */ kuid_t cr_uid; @@ -208,7 +208,7 @@ char * rpcauth_stringify_acceptor(struct rpc_cred *); static inline struct rpc_cred *get_rpccred(struct rpc_cred *cred) { - if (cred != NULL && atomic_inc_not_zero(&cred->cr_count)) + if (cred != NULL && refcount_inc_not_zero(&cred->cr_count)) return cred; return NULL; } -- cgit v1.2.3 From 331bc71cb1751d78f6807ad8e6162b07c67cdd1b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 14 Oct 2018 10:40:29 -0400 Subject: SUNRPC: Convert the auth cred cache to use refcount_t Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index a71d4bd191e7..c4db9424b63b 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -100,7 +100,7 @@ struct rpc_auth { * differ from the flavor in * au_ops->au_flavor in gss * case) */ - atomic_t au_count; /* Reference counter */ + refcount_t au_count; /* Reference counter */ struct rpc_cred_cache * au_credcache; /* per-flavor data */ -- cgit v1.2.3 From ae74136b4bb64440a55117e12065b8c282ab6c1a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Oct 2018 12:01:22 -0400 Subject: SUNRPC: Allow cache lookups to use RCU protection rather than the r/w spinlock Instead of the reader/writer spinlock, allow cache lookups to use RCU for looking up entries. This is more efficient since modifications can occur while other entries are being looked up. Note that for now, we keep the reader/writer spinlock until all users have been converted to use RCU-safe freeing of their cache entries. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 40d2822f0e2f..cf3e17ee2786 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -167,6 +167,9 @@ extern const struct file_operations cache_file_operations_pipefs; extern const struct file_operations content_file_operations_pipefs; extern const struct file_operations cache_flush_operations_pipefs; +extern struct cache_head * +sunrpc_cache_lookup_rcu(struct cache_detail *detail, + struct cache_head *key, int hash); extern struct cache_head * sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash); @@ -186,6 +189,12 @@ static inline struct cache_head *cache_get(struct cache_head *h) return h; } +static inline struct cache_head *cache_get_rcu(struct cache_head *h) +{ + if (kref_get_unless_zero(&h->ref)) + return h; + return NULL; +} static inline void cache_put(struct cache_head *h, struct cache_detail *cd) { @@ -227,6 +236,9 @@ extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *); extern void *cache_seq_start(struct seq_file *file, loff_t *pos); extern void *cache_seq_next(struct seq_file *file, void *p, loff_t *pos); extern void cache_seq_stop(struct seq_file *file, void *p); +extern void *cache_seq_start_rcu(struct seq_file *file, loff_t *pos); +extern void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos); +extern void cache_seq_stop_rcu(struct seq_file *file, void *p); extern void qword_add(char **bpp, int *lp, char *str); extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); -- cgit v1.2.3 From d48cf356a13073853f19be6ca5ebbecfc2762ebe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:51 -0400 Subject: SUNRPC: Remove non-RCU protected lookup Clean up the cache code by removing the non-RCU protected lookup. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index cf3e17ee2786..c3d67e893430 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -171,9 +171,6 @@ extern struct cache_head * sunrpc_cache_lookup_rcu(struct cache_detail *detail, struct cache_head *key, int hash); extern struct cache_head * -sunrpc_cache_lookup(struct cache_detail *detail, - struct cache_head *key, int hash); -extern struct cache_head * sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash); @@ -233,9 +230,6 @@ extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *); /* Must store cache_detail in seq_file->private if using next three functions */ -extern void *cache_seq_start(struct seq_file *file, loff_t *pos); -extern void *cache_seq_next(struct seq_file *file, void *p, loff_t *pos); -extern void cache_seq_stop(struct seq_file *file, void *p); extern void *cache_seq_start_rcu(struct seq_file *file, loff_t *pos); extern void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos); extern void cache_seq_stop_rcu(struct seq_file *file, void *p); -- cgit v1.2.3 From 1863d77f15da0addcd293a1719fa5d3ef8cde3ca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:52 -0400 Subject: SUNRPC: Replace the cache_detail->hash_lock with a regular spinlock Now that the reader functions are all RCU protected, use a regular spinlock rather than a reader/writer lock. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index c3d67e893430..5a3e95017fc6 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -67,7 +67,7 @@ struct cache_detail { struct module * owner; int hash_size; struct hlist_head * hash_table; - rwlock_t hash_lock; + spinlock_t hash_lock; char *name; void (*cache_put)(struct kref *); -- cgit v1.2.3 From 3ae2cefb613b00d613677c05ffa384b4f660f468 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 1 Oct 2018 14:16:11 -0400 Subject: svcrdma: Increase the default connection credit limit Reduce queuing on clients by allowing more credits by default. 64 is the default NFSv4.1 slot table size on Linux clients. This size prevents the credit limit from putting RPC requests to sleep again after they have already slept waiting for a session slot. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index fd78f78df5c6..e6e26918504c 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -113,13 +113,14 @@ struct svcxprt_rdma { /* sc_flags */ #define RDMAXPRT_CONN_PENDING 3 -#define RPCRDMA_LISTEN_BACKLOG 10 -#define RPCRDMA_MAX_REQUESTS 32 - -/* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our - * current NFSv4.1 implementation supports one backchannel slot. +/* + * Default connection parameters */ -#define RPCRDMA_MAX_BC_REQUESTS 2 +enum { + RPCRDMA_LISTEN_BACKLOG = 10, + RPCRDMA_MAX_REQUESTS = 64, + RPCRDMA_MAX_BC_REQUESTS = 2, +}; #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD -- cgit v1.2.3 From c3be6577d82a9f0163eb1e2c37a477414d12a209 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 1 Nov 2018 17:51:34 +0000 Subject: SUNRPC: Use atomic(64)_t for seq_send(64) The seq_send & seq_send64 fields in struct krb5_ctx are used as atomically incrementing counters. This is implemented using cmpxchg() & cmpxchg64() to implement what amount to custom versions of atomic_fetch_inc() & atomic64_fetch_inc(). Besides the duplication, using cmpxchg64() has another major drawback in that some 32 bit architectures don't provide it. As such commit 571ed1fd2390 ("SUNRPC: Replace krb5_seq_lock with a lockless scheme") resulted in build failures for some architectures. Change seq_send to be an atomic_t and seq_send64 to be an atomic64_t, then use atomic(64)_* functions to manipulate the values. The atomic64_t type & associated functions are provided even on architectures which lack real 64 bit atomic memory access via CONFIG_GENERIC_ATOMIC64 which uses spinlocks to serialize access. This fixes the build failures for architectures lacking cmpxchg64(). A potential alternative that was raised would be to provide cmpxchg64() on the 32 bit architectures that currently lack it, using spinlocks. However this would provide a version of cmpxchg64() with semantics a little different to the implementations on architectures with real 64 bit atomics - the spinlock-based implementation would only work if all access to the memory used with cmpxchg64() is *always* performed using cmpxchg64(). That is not currently a requirement for users of cmpxchg64(), and making it one seems questionable. As such avoiding cmpxchg64() outside of architecture-specific code seems best, particularly in cases where atomic64_t seems like a better fit anyway. The CONFIG_GENERIC_ATOMIC64 implementation of atomic64_* functions will use spinlocks & so faces the same issue, but with the key difference that the memory backing an atomic64_t ought to always be accessed via the atomic64_* functions anyway making the issue moot. Signed-off-by: Paul Burton Fixes: 571ed1fd2390 ("SUNRPC: Replace krb5_seq_lock with a lockless scheme") Cc: Trond Myklebust Cc: Anna Schumaker Cc: J. Bruce Fields Cc: Jeff Layton Cc: David S. Miller Cc: linux-nfs@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_krb5.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 69f749afa617..4162de72e95c 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -107,8 +107,8 @@ struct krb5_ctx { u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */ u8 cksum[GSS_KRB5_MAX_KEYLEN]; s32 endtime; - u32 seq_send; - u64 seq_send64; + atomic_t seq_send; + atomic64_t seq_send64; struct xdr_netobj mech_used; u8 initiator_sign[GSS_KRB5_MAX_KEYLEN]; u8 acceptor_sign[GSS_KRB5_MAX_KEYLEN]; @@ -118,9 +118,6 @@ struct krb5_ctx { u8 acceptor_integ[GSS_KRB5_MAX_KEYLEN]; }; -extern u32 gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx); -extern u64 gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx); - /* The length of the Kerberos GSS token header */ #define GSS_KRB5_TOK_HDR_LEN (16) -- cgit v1.2.3