From 094bb20b9fcab3a1652a77741caba6b78097d622 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:20 -0400 Subject: [PATCH] RPC: extract socket logic common to both client and server Clean-up: Move some code that is common to both RPC client- and server-side socket transports into its own source file, net/sunrpc/socklib.c. Test-plan: Compile kernel with CONFIG_NFS enabled. Millions of fsx operations over UDP, client and server. Connectathon over UDP. Version: Thu, 11 Aug 2005 16:03:09 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 23448d0fb5bc..d8b7656bca41 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -161,6 +161,7 @@ typedef struct { typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len); +extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, skb_reader_t *, skb_read_actor_t); -- cgit v1.2.3 From a246b0105bbd9a70a698f69baae2042996f2a0e9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:23 -0400 Subject: [PATCH] RPC: introduce client-side transport switch Move the bulk of client-side socket-specific code into a separate source file, net/sunrpc/xprtsock.c. Test-plan: Millions of fsx operations. Performance characterization such as "sio" or "iozone". Destructive testing (unplugging the network temporarily, server reboots). Connectathon with v2, v3, and v4. Version: Thu, 11 Aug 2005 16:03:38 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 5 ----- include/linux/sunrpc/xprt.h | 38 +++++++++++++++++++++++++++++++------- 2 files changed, 31 insertions(+), 12 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index d8b7656bca41..5da968729cf8 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -165,11 +165,6 @@ extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, skb_reader_t *, skb_read_actor_t); -struct socket; -struct sockaddr; -extern int xdr_sendpages(struct socket *, struct sockaddr *, int, - struct xdr_buf *, unsigned int, int); - extern int xdr_encode_word(struct xdr_buf *, int, u32); extern int xdr_decode_word(struct xdr_buf *, int, u32 *); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index e618c1649814..d82b47ab73cb 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -59,7 +59,13 @@ extern unsigned int xprt_tcp_slot_table_entries; */ #define RPC_REESTABLISH_TIMEOUT (15*HZ) -/* RPC call and reply header size as number of 32bit words (verifier +/* + * RPC transport idle timeout. + */ +#define RPC_IDLE_DISCONNECT_TIMEOUT (5*60*HZ) + +/* + * RPC call and reply header size as number of 32bit words (verifier * size computed separately) */ #define RPC_CALLHDRSIZE 6 @@ -121,12 +127,19 @@ struct rpc_rqst { #define rq_svec rq_snd_buf.head #define rq_slen rq_snd_buf.len -#define XPRT_LAST_FRAG (1 << 0) -#define XPRT_COPY_RECM (1 << 1) -#define XPRT_COPY_XID (1 << 2) -#define XPRT_COPY_DATA (1 << 3) +struct rpc_task; +struct rpc_xprt; + +struct rpc_xprt_ops { + void (*set_buffer_size)(struct rpc_xprt *xprt); + void (*connect)(struct rpc_task *task); + int (*send_request)(struct rpc_task *task); + void (*close)(struct rpc_xprt *xprt); + void (*destroy)(struct rpc_xprt *xprt); +}; struct rpc_xprt { + struct rpc_xprt_ops * ops; /* transport methods */ struct socket * sock; /* BSD socket layer */ struct sock * inet; /* INET layer */ @@ -199,14 +212,22 @@ struct rpc_xprt { wait_queue_head_t cong_wait; }; +#define XPRT_LAST_FRAG (1 << 0) +#define XPRT_COPY_RECM (1 << 1) +#define XPRT_COPY_XID (1 << 2) +#define XPRT_COPY_DATA (1 << 3) + #ifdef __KERNEL__ struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, struct rpc_timeout *toparms); +void xprt_disconnect(struct rpc_xprt *); int xprt_destroy(struct rpc_xprt *); void xprt_set_timeout(struct rpc_timeout *, unsigned int, unsigned long); - +struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *, u32); +void xprt_complete_rqst(struct rpc_xprt *, + struct rpc_rqst *, int); void xprt_reserve(struct rpc_task *); int xprt_prepare_transmit(struct rpc_task *); void xprt_transmit(struct rpc_task *); @@ -214,7 +235,10 @@ void xprt_receive(struct rpc_task *); int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release(struct rpc_task *); void xprt_connect(struct rpc_task *); -void xprt_sock_setbufsize(struct rpc_xprt *); +int xs_setup_udp(struct rpc_xprt *, + struct rpc_timeout *); +int xs_setup_tcp(struct rpc_xprt *, + struct rpc_timeout *); #define XPRT_LOCKED 0 #define XPRT_CONNECT 1 -- cgit v1.2.3 From 9903cd1c27a1f30e8efea75e125be3b2002f7cb9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:26 -0400 Subject: [PATCH] RPC: transport switch function naming Introduce block header comments and a function naming convention to the socket transport implementation. Provide a debug setting for transports that is separate from RPCDBG_XPRT. Eliminate xprt_default_timeout(). Provide block comments for exposed interfaces in xprt.c, and eliminate the useless obvious comments. Convert printk's to dprintk's. Test-plan: Compile kernel with CONFIG_NFS enabled. Version: Thu, 11 Aug 2005 16:04:04 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/debug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index eadb31e3c198..42d299747956 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -32,6 +32,7 @@ #define RPCDBG_AUTH 0x0010 #define RPCDBG_PMAP 0x0020 #define RPCDBG_SCHED 0x0040 +#define RPCDBG_TRANS 0x0080 #define RPCDBG_SVCSOCK 0x0100 #define RPCDBG_SVCDSP 0x0200 #define RPCDBG_MISC 0x0400 -- cgit v1.2.3 From 4a0f8c04f2ece949d54a0c4fd7490259cf23a58a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:32 -0400 Subject: [PATCH] RPC: Rename sock_lock Clean-up: replace a name reference to sockets in the generic parts of the RPC client by renaming sock_lock in the rpc_xprt structure. Test-plan: Compile kernel with CONFIG_NFS enabled. Version: Thu, 11 Aug 2005 16:05:00 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d82b47ab73cb..c4f903f0e17c 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -198,7 +198,7 @@ struct rpc_xprt { /* * Send stuff */ - spinlock_t sock_lock; /* lock socket info */ + spinlock_t transport_lock; /* lock transport info */ spinlock_t xprt_lock; /* lock xprt info */ struct rpc_task * snd_task; /* Task blocked in send */ -- cgit v1.2.3 From 5dc07727f86b25851e95193a0c484ea21b531c47 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:35 -0400 Subject: [PATCH] RPC: Rename xprt_lock Clean-up: Replace the xprt_lock with something more aptly named. This lock single-threads the XID and request slot reservation process. Test-plan: Compile kernel with CONFIG_NFS enabled. Version: Thu, 11 Aug 2005 16:05:26 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index c4f903f0e17c..41ce296dded1 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -199,7 +199,7 @@ struct rpc_xprt { * Send stuff */ spinlock_t transport_lock; /* lock transport info */ - spinlock_t xprt_lock; /* lock xprt info */ + spinlock_t reserve_lock; /* lock slot table */ struct rpc_task * snd_task; /* Task blocked in send */ struct list_head recv; -- cgit v1.2.3 From 2226feb6bcd0e5e117a9be3ea3dd3ffc14f3e41e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:38 -0400 Subject: [PATCH] RPC: rename the sockstate field Clean-up: get rid of a name reference to sockets in the generic parts of the RPC client by renaming the sockstate field in the rpc_xprt structure. Test-plan: Compile kernel with CONFIG_NFS enabled. Version: Thu, 11 Aug 2005 16:05:53 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 60 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 11 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 41ce296dded1..009a3bb4f997 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -163,7 +163,7 @@ struct rpc_xprt { struct list_head free; /* free slots */ struct rpc_rqst * slot; /* slot table storage */ unsigned int max_reqs; /* total slots */ - unsigned long sockstate; /* Socket state */ + unsigned long state; /* transport state */ unsigned char shutdown : 1, /* being shut down */ nocong : 1, /* no congestion control */ resvport : 1, /* use a reserved port */ @@ -240,16 +240,54 @@ int xs_setup_udp(struct rpc_xprt *, int xs_setup_tcp(struct rpc_xprt *, struct rpc_timeout *); -#define XPRT_LOCKED 0 -#define XPRT_CONNECT 1 -#define XPRT_CONNECTING 2 - -#define xprt_connected(xp) (test_bit(XPRT_CONNECT, &(xp)->sockstate)) -#define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate)) -#define xprt_test_and_set_connected(xp) (test_and_set_bit(XPRT_CONNECT, &(xp)->sockstate)) -#define xprt_test_and_clear_connected(xp) \ - (test_and_clear_bit(XPRT_CONNECT, &(xp)->sockstate)) -#define xprt_clear_connected(xp) (clear_bit(XPRT_CONNECT, &(xp)->sockstate)) +/* + * Reserved bit positions in xprt->state + */ +#define XPRT_LOCKED (0) +#define XPRT_CONNECTED (1) +#define XPRT_CONNECTING (2) + +static inline void xprt_set_connected(struct rpc_xprt *xprt) +{ + set_bit(XPRT_CONNECTED, &xprt->state); +} + +static inline void xprt_clear_connected(struct rpc_xprt *xprt) +{ + clear_bit(XPRT_CONNECTED, &xprt->state); +} + +static inline int xprt_connected(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_CONNECTED, &xprt->state); +} + +static inline int xprt_test_and_set_connected(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_CONNECTED, &xprt->state); +} + +static inline int xprt_test_and_clear_connected(struct rpc_xprt *xprt) +{ + return test_and_clear_bit(XPRT_CONNECTED, &xprt->state); +} + +static inline void xprt_clear_connecting(struct rpc_xprt *xprt) +{ + smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTING, &xprt->state); + smp_mb__after_clear_bit(); +} + +static inline int xprt_connecting(struct rpc_xprt *xprt) +{ + return test_bit(XPRT_CONNECTING, &xprt->state); +} + +static inline int xprt_test_and_set_connecting(struct rpc_xprt *xprt) +{ + return test_and_set_bit(XPRT_CONNECTING, &xprt->state); +} #endif /* __KERNEL__*/ -- cgit v1.2.3 From 44fbac2288dfed6f1963ac00bf922c3bcd779cd1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:44 -0400 Subject: [PATCH] RPC: Add helper for waking tasks pending on a transport Clean-up: remove only reference to xprt->pending from the socket transport implementation. This makes a cleaner interface for other transport implementations as well. Test-plan: Compile kernel with CONFIG_NFS enabled. Version: Thu, 11 Aug 2005 16:06:52 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 009a3bb4f997..d5223993fca9 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -232,6 +232,7 @@ void xprt_reserve(struct rpc_task *); int xprt_prepare_transmit(struct rpc_task *); void xprt_transmit(struct rpc_task *); void xprt_receive(struct rpc_task *); +void xprt_wake_pending_tasks(struct rpc_xprt *, int); int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release(struct rpc_task *); void xprt_connect(struct rpc_task *); -- cgit v1.2.3 From 55aa4f58aa43dc9a51fb80010630d94b96053a2e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:47 -0400 Subject: [PATCH] RPC: client-side transport switch cleanup Clean-up: change some comments to reflect the realities of the new RPC transport switch mechanism. Get rid of unused xprt_receive() prototype. Also, organize function prototypes in xprt.h by usage and scope. Test-plan: Compile kernel with CONFIG_NFS enabled. Version: Thu, 11 Aug 2005 16:07:21 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 61 ++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 26 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d5223993fca9..bfbc492ae36d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -1,5 +1,5 @@ /* - * linux/include/linux/sunrpc/clnt_xprt.h + * linux/include/linux/sunrpc/xprt.h * * Declarations for the RPC transport interface. * @@ -150,8 +150,8 @@ struct rpc_xprt { unsigned long cong; /* current congestion */ unsigned long cwnd; /* congestion window */ - unsigned int rcvsize, /* socket receive buffer size */ - sndsize; /* socket send buffer size */ + unsigned int rcvsize, /* transport rcv buffer size */ + sndsize; /* transport send buffer size */ size_t max_payload; /* largest RPC payload size, in bytes */ @@ -184,12 +184,12 @@ struct rpc_xprt { unsigned long tcp_copied, /* copied to request */ tcp_flags; /* - * Connection of sockets + * Connection of transports */ - struct work_struct sock_connect; + struct work_struct connect_worker; unsigned short port; /* - * Disconnection of idle sockets + * Disconnection of idle transports */ struct work_struct task_cleanup; struct timer_list timer; @@ -219,27 +219,36 @@ struct rpc_xprt { #ifdef __KERNEL__ -struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, - struct rpc_timeout *toparms); -void xprt_disconnect(struct rpc_xprt *); -int xprt_destroy(struct rpc_xprt *); -void xprt_set_timeout(struct rpc_timeout *, unsigned int, - unsigned long); -struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *, u32); -void xprt_complete_rqst(struct rpc_xprt *, - struct rpc_rqst *, int); -void xprt_reserve(struct rpc_task *); -int xprt_prepare_transmit(struct rpc_task *); -void xprt_transmit(struct rpc_task *); -void xprt_receive(struct rpc_task *); -void xprt_wake_pending_tasks(struct rpc_xprt *, int); +/* + * Transport operations used by ULPs + */ +struct rpc_xprt * xprt_create_proto(int proto, struct sockaddr_in *addr, struct rpc_timeout *to); +void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr); + +/* + * Generic internal transport functions + */ +void xprt_connect(struct rpc_task *task); +void xprt_reserve(struct rpc_task *task); +int xprt_prepare_transmit(struct rpc_task *task); +void xprt_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); -void xprt_release(struct rpc_task *); -void xprt_connect(struct rpc_task *); -int xs_setup_udp(struct rpc_xprt *, - struct rpc_timeout *); -int xs_setup_tcp(struct rpc_xprt *, - struct rpc_timeout *); +void xprt_release(struct rpc_task *task); +int xprt_destroy(struct rpc_xprt *xprt); + +/* + * Transport switch helper functions + */ +void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); +struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid); +void xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied); +void xprt_disconnect(struct rpc_xprt *xprt); + +/* + * Socket transport setup operations + */ +int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to); +int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to); /* * Reserved bit positions in xprt->state -- cgit v1.2.3 From c7b2cae8a634015b72941ba2fc6c4bc9b8d3a129 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Aug 2005 16:25:50 -0400 Subject: [PATCH] RPC: separate TCP and UDP write space callbacks Split the socket write space callback function into a TCP version and UDP version, eliminating one dependence on the "xprt->stream" variable. Keep the common pieces of this path in xprt.c so other transports can use it too. Test-plan: Write-intensive workload on a single mount point. Version: Thu, 11 Aug 2005 16:07:51 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index bfbc492ae36d..e73174c7e450 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -240,6 +240,8 @@ int xprt_destroy(struct rpc_xprt *xprt); * Transport switch helper functions */ void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); +void xprt_wait_for_buffer_space(struct rpc_task *task); +void xprt_write_space(struct rpc_xprt *xprt); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid); void xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied); void xprt_disconnect(struct rpc_xprt *xprt); -- cgit v1.2.3 From 808012fbb23a52ec59352445d2076d175ad4ab26 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:49 -0700 Subject: [PATCH] RPC: skip over transport-specific heads automatically Add a generic mechanism for skipping over transport-specific headers when constructing an RPC request. This removes another "xprt->stream" dependency. Test-plan: Write-intensive workload on a single mount point (try both UDP and TCP). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/msg_prot.h | 25 +++++++++++++++++++++++++ include/linux/sunrpc/xprt.h | 7 +++++++ 2 files changed, 32 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 15f115332389..f43f237360ae 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -76,5 +76,30 @@ enum rpc_auth_stat { #define RPC_MAXNETNAMELEN 256 +/* + * From RFC 1831: + * + * "A record is composed of one or more record fragments. A record + * fragment is a four-byte header followed by 0 to (2**31) - 1 bytes of + * fragment data. The bytes encode an unsigned binary number; as with + * XDR integers, the byte order is from highest to lowest. The number + * encodes two values -- a boolean which indicates whether the fragment + * is the last fragment of the record (bit value 1 implies the fragment + * is the last fragment) and a 31-bit unsigned binary value which is the + * length in bytes of the fragment's data. The boolean value is the + * highest-order bit of the header; the length is the 31 low-order bits. + * (Note that this record specification is NOT in XDR standard form!)" + * + * The Linux RPC client always sends its requests in a single record + * fragment, limiting the maximum payload size for stream transports to + * 2GB. + */ + +typedef u32 rpc_fraghdr; + +#define RPC_LAST_STREAM_FRAGMENT (1U << 31) +#define RPC_FRAGMENT_SIZE_MASK (~RPC_LAST_STREAM_FRAGMENT) +#define RPC_MAX_FRAGMENT_SIZE ((1U << 31) - 1) + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index e73174c7e450..966c456a0f6d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -155,6 +155,8 @@ struct rpc_xprt { size_t max_payload; /* largest RPC payload size, in bytes */ + unsigned int tsh_size; /* size of transport specific + header */ struct rpc_wait_queue sending; /* requests waiting to send */ struct rpc_wait_queue resend; /* requests waiting to resend */ @@ -236,6 +238,11 @@ int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release(struct rpc_task *task); int xprt_destroy(struct rpc_xprt *xprt); +static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p) +{ + return p + xprt->tsh_size; +} + /* * Transport switch helper functions */ -- cgit v1.2.3 From 43118c29dea2b23798bd42a147015cceee7fa885 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:49 -0700 Subject: [PATCH] RPC: get rid of xprt->stream Now we can fix up the last few places that use the "xprt->stream" variable, and get rid of it from the rpc_xprt structure. Test-plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 966c456a0f6d..c9477f022efb 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -168,8 +168,7 @@ struct rpc_xprt { unsigned long state; /* transport state */ unsigned char shutdown : 1, /* being shut down */ nocong : 1, /* no congestion control */ - resvport : 1, /* use a reserved port */ - stream : 1; /* TCP */ + resvport : 1; /* use a reserved port */ /* * XID -- cgit v1.2.3 From fe3aca290f17ae4978bd73d02aa4029f1c9c024c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:50 -0700 Subject: [PATCH] RPC: add API to set transport-specific timeouts Prepare the way to remove the "xprt->nocong" variable by adding a callout to the RPC client transport switch API to handle setting RPC retransmit timeouts. Add a pair of generic helper functions that provide the ability to set a simple fixed timeout, or to set a timeout based on the state of a round- trip estimator. Test-plan: Use WAN simulation to cause sporadic bursty packet loss. Look for significant regression in performance or client stability. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index c9477f022efb..ac08e99a81cb 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -134,6 +134,7 @@ struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt); void (*connect)(struct rpc_task *task); int (*send_request)(struct rpc_task *task); + void (*set_retrans_timeout)(struct rpc_task *task); void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); }; @@ -245,6 +246,8 @@ static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p) /* * Transport switch helper functions */ +void xprt_set_retrans_timeout_def(struct rpc_task *task); +void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); void xprt_wait_for_buffer_space(struct rpc_task *task); void xprt_write_space(struct rpc_xprt *xprt); -- cgit v1.2.3 From 12a804698b29d040b7cdd92e8a44b0e75164dae9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:51 -0700 Subject: [PATCH] RPC: expose API for serializing access to RPC transports The next several patches introduce an API that allows transports to choose whether the RPC client provides congestion control or whether the transport itself provides it. The first method we abstract is the one that serializes access to the RPC transport to prevent the bytes from different requests from mingling together. This method provides proper request serialization and the opportunity to prevent new requests from being started because the transport is congested. The normal situation is for the transport to handle congestion control itself. Although NFS over UDP was first, it has been recognized after years of experience that having the transport provide congestion control is much better than doing it in the RPC client. Thus TCP, and probably every future transport implementation, will use the default method, xprt_lock_write, provided in xprt.c, which does not provide any kind of congestion control. UDP can continue using the xprt.c-provided Van Jacobson congestion avoidance implementation. Test-plan: Use WAN simulation to cause sporadic bursty packet loss. Look for significant regression in performance or client stability. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index ac08e99a81cb..eee1c6877851 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -132,6 +132,7 @@ struct rpc_xprt; struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt); + int (*reserve_xprt)(struct rpc_task *task); void (*connect)(struct rpc_task *task); int (*send_request)(struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); @@ -232,6 +233,8 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long */ void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); +int xprt_reserve_xprt(struct rpc_task *task); +int xprt_reserve_xprt_cong(struct rpc_task *task); int xprt_prepare_transmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); -- cgit v1.2.3 From 49e9a89086b3cae784a4868ca852863e4f4ea3fe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:51 -0700 Subject: [PATCH] RPC: expose API for serializing access to RPC transports The next method we abstract is the one that releases a transport, allowing another task to have access to the transport. Again, one generic version of this is provided for transports that don't need the RPC client to perform congestion control, and one version is for transports that can use the original Van Jacobson implementation in xprt.c. Test-plan: Use WAN simulation to cause sporadic bursty packet loss. Look for significant regression in performance or client stability. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index eee1c6877851..86833b725bb5 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -133,6 +133,7 @@ struct rpc_xprt; struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt); int (*reserve_xprt)(struct rpc_task *task); + void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*connect)(struct rpc_task *task); int (*send_request)(struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); @@ -238,6 +239,8 @@ int xprt_reserve_xprt_cong(struct rpc_task *task); int xprt_prepare_transmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); +void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task); +void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release(struct rpc_task *task); int xprt_destroy(struct rpc_xprt *xprt); -- cgit v1.2.3 From 46c0ee8bc4ad3743de05e8b8b20201df44dcb6d3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:52 -0700 Subject: [PATCH] RPC: separate xprt_timer implementations Allow transports to hook the retransmit timer interrupt. Some transports calculate their congestion window here so that a retransmit timeout has immediate effect on the congestion window. Test-plan: Use WAN simulation to cause sporadic bursty packet loss. Look for significant regression in performance or client stability. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 86833b725bb5..443c3f984cf9 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -137,6 +137,7 @@ struct rpc_xprt_ops { void (*connect)(struct rpc_task *task); int (*send_request)(struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); + void (*timer)(struct rpc_task *task); void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); }; @@ -257,6 +258,7 @@ void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); void xprt_wait_for_buffer_space(struct rpc_task *task); void xprt_write_space(struct rpc_xprt *xprt); +void xprt_adjust_cwnd(struct rpc_task *task, int result); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid); void xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied); void xprt_disconnect(struct rpc_xprt *xprt); -- cgit v1.2.3 From 1570c1e41eabf6b7031f3e4322a2cf1cbe319fee Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:52 -0700 Subject: [PATCH] RPC: add generic interface for adjusting the congestion window A new interface that allows transports to adjust their congestion window using the Van Jacobson implementation in xprt.c is provided. Test-plan: Use WAN simulation to cause sporadic bursty packet loss. Look for significant regression in performance or client stability. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 443c3f984cf9..2e48752d55d9 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -258,9 +258,10 @@ void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); void xprt_wait_for_buffer_space(struct rpc_task *task); void xprt_write_space(struct rpc_xprt *xprt); +void xprt_update_rtt(struct rpc_task *task); void xprt_adjust_cwnd(struct rpc_task *task, int result); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid); -void xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied); +void xprt_complete_rqst(struct rpc_task *task, int copied); void xprt_disconnect(struct rpc_xprt *xprt); /* -- cgit v1.2.3 From a58dd398f5db4f73d5c581069fd70a4304cc4f0a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:53 -0700 Subject: [PATCH] RPC: add a release_rqst callout to the RPC transport switch The final place where congestion control state is adjusted is in xprt_release, where each request is finally released. Add a callout there to allow transports to perform additional processing when a request is about to be released. Test-plan: Use WAN simulation to cause sporadic bursty packet loss. Look for significant regression in performance or client stability. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 2e48752d55d9..64e77658fa30 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -138,6 +138,7 @@ struct rpc_xprt_ops { int (*send_request)(struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); void (*timer)(struct rpc_task *task); + void (*release_request)(struct rpc_task *task); void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); }; @@ -262,6 +263,7 @@ void xprt_update_rtt(struct rpc_task *task); void xprt_adjust_cwnd(struct rpc_task *task, int result); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid); void xprt_complete_rqst(struct rpc_task *task, int copied); +void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect(struct rpc_xprt *xprt); /* -- cgit v1.2.3 From ed63c003701a314c4893c11eceb9d68f8f46c662 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:53 -0700 Subject: [PATCH] RPC: remove xprt->nocong Get rid of the "xprt->nocong" variable. Test-plan: Use WAN simulation to cause sporadic bursty packet loss with UDP mounts. Look for significant regression in performance or client stability. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 64e77658fa30..559fb471f6f2 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -172,7 +172,6 @@ struct rpc_xprt { unsigned int max_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char shutdown : 1, /* being shut down */ - nocong : 1, /* no congestion control */ resvport : 1; /* use a reserved port */ /* -- cgit v1.2.3 From 555ee3af161b037865793bd4bebc06b58daafde6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:54 -0700 Subject: [PATCH] RPC: clean up after nocong was removed Clean-up: Move some macros that are specific to the Van Jacobson implementation into xprt.c. Get rid of the cong_wait field in rpc_xprt, which is no longer used. Get rid of xprt_clear_backlog. Test-plan: Compile with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 559fb471f6f2..dcf0326bda01 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -15,20 +15,6 @@ #include #include -/* - * The transport code maintains an estimate on the maximum number of out- - * standing RPC requests, using a smoothed version of the congestion - * avoidance implemented in 44BSD. This is basically the Van Jacobson - * congestion algorithm: If a retransmit occurs, the congestion window is - * halved; otherwise, it is incremented by 1/cwnd when - * - * - a reply is received and - * - a full number of requests are outstanding and - * - the congestion window hasn't been updated recently. - * - * Upper procedures may check whether a request would block waiting for - * a free RPC slot by using the RPC_CONGESTED() macro. - */ extern unsigned int xprt_udp_slot_table_entries; extern unsigned int xprt_tcp_slot_table_entries; @@ -36,12 +22,6 @@ extern unsigned int xprt_tcp_slot_table_entries; #define RPC_DEF_SLOT_TABLE (16U) #define RPC_MAX_SLOT_TABLE (128U) -#define RPC_CWNDSHIFT (8U) -#define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) -#define RPC_INITCWND RPC_CWNDSCALE -#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) -#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) - /* Default timeout values */ #define RPC_MAX_UDP_TIMEOUT (60*HZ) #define RPC_MAX_TCP_TIMEOUT (600*HZ) @@ -213,8 +193,6 @@ struct rpc_xprt { void (*old_data_ready)(struct sock *, int); void (*old_state_change)(struct sock *); void (*old_write_space)(struct sock *); - - wait_queue_head_t cong_wait; }; #define XPRT_LAST_FRAG (1 << 0) -- cgit v1.2.3 From 529b33c6db0120126b1381faa51406dc463acdc9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:54 -0700 Subject: [PATCH] RPC: allow RPC client's port range to be adjustable Select an RPC client source port between 650 and 1023 instead of between 1 and 800. The old range conflicts with a number of network services. Provide sysctls to allow admins to select a different port range. Note that this doesn't affect user-level RPC library behavior, which still uses 1 to 800. Based on a suggestion by Olaf Kirch . Test-plan: Repeated mount and unmount. Destructive testing. Idle timeouts. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/debug.h | 2 ++ include/linux/sunrpc/xprt.h | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 42d299747956..1a42d902bc11 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -95,6 +95,8 @@ enum { CTL_NLMDEBUG, CTL_SLOTTABLE_UDP, CTL_SLOTTABLE_TCP, + CTL_MIN_RESVPORT, + CTL_MAX_RESVPORT, }; #endif /* _LINUX_SUNRPC_DEBUG_H_ */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index dcf0326bda01..9d9266cf8a36 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -51,6 +51,17 @@ extern unsigned int xprt_tcp_slot_table_entries; #define RPC_CALLHDRSIZE 6 #define RPC_REPHDRSIZE 4 +/* + * Parameters for choosing a free port + */ +extern unsigned int xprt_min_resvport; +extern unsigned int xprt_max_resvport; + +#define RPC_MIN_RESVPORT (1U) +#define RPC_MAX_RESVPORT (65535U) +#define RPC_DEF_MIN_RESVPORT (650U) +#define RPC_DEF_MAX_RESVPORT (1023U) + /* * This describes a timeout strategy */ @@ -62,6 +73,9 @@ struct rpc_timeout { unsigned char to_exponential; }; +struct rpc_task; +struct rpc_xprt; + /* * This describes a complete RPC request */ @@ -107,9 +121,6 @@ struct rpc_rqst { #define rq_svec rq_snd_buf.head #define rq_slen rq_snd_buf.len -struct rpc_task; -struct rpc_xprt; - struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt); int (*reserve_xprt)(struct rpc_task *task); -- cgit v1.2.3 From 03bf4b707eee06706c9db343dd5c905b7ee47ed2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:55 -0700 Subject: [PATCH] RPC: parametrize various transport connect timeouts Each transport implementation can now set unique bind, connect, reestablishment, and idle timeout values. These are variables, allowing the values to be modified dynamically. This permits exponential backoff of any of these values, for instance. As an example, we implement exponential backoff for the connection reestablishment timeout. Test-plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 9d9266cf8a36..2543adf18551 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -22,28 +22,6 @@ extern unsigned int xprt_tcp_slot_table_entries; #define RPC_DEF_SLOT_TABLE (16U) #define RPC_MAX_SLOT_TABLE (128U) -/* Default timeout values */ -#define RPC_MAX_UDP_TIMEOUT (60*HZ) -#define RPC_MAX_TCP_TIMEOUT (600*HZ) - -/* - * Wait duration for an RPC TCP connection to be established. Solaris - * NFS over TCP uses 60 seconds, for example, which is in line with how - * long a server takes to reboot. - */ -#define RPC_CONNECT_TIMEOUT (60*HZ) - -/* - * Delay an arbitrary number of seconds before attempting to reconnect - * after an error. - */ -#define RPC_REESTABLISH_TIMEOUT (15*HZ) - -/* - * RPC transport idle timeout. - */ -#define RPC_IDLE_DISCONNECT_TIMEOUT (5*60*HZ) - /* * RPC call and reply header size as number of 32bit words (verifier * size computed separately) @@ -182,14 +160,19 @@ struct rpc_xprt { /* * Connection of transports */ + unsigned long connect_timeout, + bind_timeout, + reestablish_timeout; struct work_struct connect_worker; unsigned short port; + /* * Disconnection of idle transports */ struct work_struct task_cleanup; struct timer_list timer; - unsigned long last_used; + unsigned long last_used, + idle_timeout; /* * Send stuff -- cgit v1.2.3 From 470056c288334eb0b37be26c9ff8aee37ed1cc7a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Aug 2005 16:25:56 -0700 Subject: [PATCH] RPC: rationalize set_buffer_size In fact, ->set_buffer_size should be completely functionless for non-UDP. Test-plan: Check socket buffer size on UDP sockets over time. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 2543adf18551..99cad3ead81d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -100,7 +100,7 @@ struct rpc_rqst { #define rq_slen rq_snd_buf.len struct rpc_xprt_ops { - void (*set_buffer_size)(struct rpc_xprt *xprt); + void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); int (*reserve_xprt)(struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*connect)(struct rpc_task *task); @@ -124,7 +124,7 @@ struct rpc_xprt { unsigned long cong; /* current congestion */ unsigned long cwnd; /* congestion window */ - unsigned int rcvsize, /* transport rcv buffer size */ + size_t rcvsize, /* transport rcv buffer size */ sndsize; /* transport send buffer size */ size_t max_payload; /* largest RPC payload size, -- cgit v1.2.3 From 278c995c8a153bb2a9bc427e931cfb9c8034c9d7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 24 Jul 2005 23:53:01 +0100 Subject: [PATCH] RPC,NFS: new rpc_pipefs patch Currently rpc_mkdir/rpc_rmdir and rpc_mkpipe/mk_unlink have an API that's a little unfortunate. They take a path relative to the rpc_pipefs root and thus need to perform a full lookup. If you look at debugfs or usbfs they always store the dentry for directories they created and thus can pass in a dentry + single pathname component pair into their equivalents of the above functions. And in fact rpc_pipefs actually stores a dentry for all but one component so this change not only simplifies the core rpc_pipe code but also the callers. Unfortuntately this code path is only used by the NFS4 idmapper and AUTH_GSSAPI for which I don't have a test enviroment. Could someone give it a spin? It's the last bit needed before we can rework the lookup_hash API Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 +- include/linux/sunrpc/rpc_pipe_fs.h | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ab151bbb66df..b5b51c196690 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -59,7 +59,7 @@ struct rpc_clnt { int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; - char cl_pathname[30];/* Path in rpc_pipe_fs */ + struct dentry * __cl_parent_dentry; struct dentry * cl_dentry; /* inode */ struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 63929349571f..63878d05c9a9 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -41,10 +41,11 @@ RPC_I(struct inode *inode) extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); -extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *); -extern int rpc_rmdir(char *); -extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags); -extern int rpc_unlink(char *); +extern struct dentry *rpc_mkdir(struct dentry *, char *, struct rpc_clnt *); +extern void rpc_rmdir(struct dentry *); +extern struct dentry *rpc_mkpipe(struct dentry *, char *, void *, + struct rpc_pipe_ops *, int flags); +extern void rpc_unlink(struct dentry *); #endif #endif -- cgit v1.2.3 From f134585a7343d71f9be7f0cf97e2145f21dd10c6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Sep 2005 11:08:25 -0400 Subject: Revert "[PATCH] RPC,NFS: new rpc_pipefs patch" This reverts 17f4e6febca160a9f9dd4bdece9784577a2f4524 commit. --- include/linux/sunrpc/clnt.h | 2 +- include/linux/sunrpc/rpc_pipe_fs.h | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b5b51c196690..ab151bbb66df 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -59,7 +59,7 @@ struct rpc_clnt { int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; - struct dentry * __cl_parent_dentry; + char cl_pathname[30];/* Path in rpc_pipe_fs */ struct dentry * cl_dentry; /* inode */ struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index 63878d05c9a9..63929349571f 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -41,11 +41,10 @@ RPC_I(struct inode *inode) extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); -extern struct dentry *rpc_mkdir(struct dentry *, char *, struct rpc_clnt *); -extern void rpc_rmdir(struct dentry *); -extern struct dentry *rpc_mkpipe(struct dentry *, char *, void *, - struct rpc_pipe_ops *, int flags); -extern void rpc_unlink(struct dentry *); +extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *); +extern int rpc_rmdir(char *); +extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags); +extern int rpc_unlink(char *); #endif #endif -- cgit v1.2.3 From 5e5ce5be6f0161d2a069a4f8a1154fe639c5c02f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:11 -0700 Subject: RPC: allow call_encode() to delay transmission of an RPC call. Currently, call_encode will cause the entire RPC call to abort if it returns an error. This is unnecessarily rigid, and gets in the way of attempts to allow the NFSv4 layer to order RPC calls that carry sequence ids. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 99cad3ead81d..068e1fb0868b 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -211,6 +211,7 @@ int xprt_reserve_xprt(struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_task *task); int xprt_prepare_transmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); +void xprt_abort_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); -- cgit v1.2.3 From 293f1eb551a77fe5c8956a559a3c0baea95cd9bc Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 13 Oct 2005 16:54:37 -0400 Subject: SUNRPC: Add support for privacy to generic gss-api code. Add support for privacy to generic gss-api code. This is dead code until we have both a mechanism that supports privacy and code in the client or server that uses it. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_api.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 689262f63059..e896752ffbf9 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -48,6 +48,17 @@ u32 gss_verify_mic( struct xdr_buf *message, struct xdr_netobj *mic_token, u32 *qstate); +u32 gss_wrap( + struct gss_ctx *ctx_id, + u32 qop, + int offset, + struct xdr_buf *outbuf, + struct page **inpages); +u32 gss_unwrap( + struct gss_ctx *ctx_id, + u32 *qop, + int offset, + struct xdr_buf *inbuf); u32 gss_delete_sec_context( struct gss_ctx **ctx_id); @@ -93,6 +104,17 @@ struct gss_api_ops { struct xdr_buf *message, struct xdr_netobj *mic_token, u32 *qstate); + u32 (*gss_wrap)( + struct gss_ctx *ctx_id, + u32 qop, + int offset, + struct xdr_buf *outbuf, + struct page **inpages); + u32 (*gss_unwrap)( + struct gss_ctx *ctx_id, + u32 *qop, + int offset, + struct xdr_buf *buf); void (*gss_delete_sec_context)( void *internal_ctx_id); }; -- cgit v1.2.3 From ead5e1c26fdcd969cf40c49cb0589d56879d240d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 13 Oct 2005 16:54:43 -0400 Subject: SUNRPC: Provide a callback to allow free pages allocated during xdr encoding For privacy, we need to allocate pages to store the encrypted data (passed in pages can't be used without the risk of corrupting data in the page cache). So we need a way to free that memory after the request has been transmitted. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 068e1fb0868b..3b8b6e823c70 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -73,7 +73,10 @@ struct rpc_rqst { int rq_cong; /* has incremented xprt->cong */ int rq_received; /* receive completed */ u32 rq_seqno; /* gss seq no. used on req. */ - + int rq_enc_pages_num; + struct page **rq_enc_pages; /* scratch pages for use by + gss privacy code */ + void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ struct list_head rq_list; struct xdr_buf rq_private_buf; /* The receive buffer -- cgit v1.2.3 From 24b2605becc10ca63c4c30808fa59a8abbf68727 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 13 Oct 2005 16:54:53 -0400 Subject: RPCSEC_GSS: cleanup au_rslack calculation Various xdr encode routines use au_rslack to guess where the reply argument will end up, so we can set up the xdr_buf to recieve data into the right place for zero copy. Currently we calculate the au_rslack estimate when we check the verifier. Normally this only depends on the verifier size. In the integrity case we add a few bytes to allow for a length and sequence number. It's a bit simpler to calculate only the verifier size when we check the verifier, and delay the full calculation till we unwrap. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 04ebc24db348..b68c11a2d6dd 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -66,7 +66,12 @@ struct rpc_cred_cache { struct rpc_auth { unsigned int au_cslack; /* call cred size estimate */ - unsigned int au_rslack; /* reply verf size guess */ + /* guess at number of u32's auth adds before + * reply data; normally the verifier size: */ + unsigned int au_rslack; + /* for gss, used to calculate au_rslack: */ + unsigned int au_verfsize; + unsigned int au_flags; /* various flags */ struct rpc_authops * au_ops; /* operations */ rpc_authflavor_t au_flavor; /* pseudoflavor (note may -- cgit v1.2.3 From bfa91516b57483fc9c81d8d90325fd2c3c16ac48 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 13 Oct 2005 16:55:08 -0400 Subject: RPCSEC_GSS: krb5 pre-privacy cleanup The code this was originally derived from processed wrap and mic tokens using the same functions. This required some contortions, and more would be required with the addition of xdr_buf's, so it's better to separate out the two code paths. In preparation for adding privacy support, remove the last vestiges of the old wrap token code. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_krb5.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index ffe31d2eb9ec..cb35833e2ae3 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -121,13 +121,12 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, u32 krb5_make_token(struct krb5_ctx *context_handle, int qop_req, struct xdr_buf *input_message_buffer, - struct xdr_netobj *output_message_buffer, int toktype); + struct xdr_netobj *output_message_buffer); u32 krb5_read_token(struct krb5_ctx *context_handle, struct xdr_netobj *input_token_buffer, - struct xdr_buf *message_buffer, - int *qop_state, int toktype); + struct xdr_buf *message_buffer, int *qop_state); u32 krb5_encrypt(struct crypto_tfm * key, -- cgit v1.2.3 From 14ae162c24d985593d5b19437d7f3d8fd0062b59 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 13 Oct 2005 16:55:13 -0400 Subject: RPCSEC_GSS: Add support for privacy to krb5 rpcsec_gss mechanism. Add support for privacy to the krb5 rpcsec_gss mechanism. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_krb5.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index cb35833e2ae3..7f93c2d5ebdb 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -116,7 +116,7 @@ enum seal_alg { s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, - struct xdr_netobj *cksum); + int body_offset, struct xdr_netobj *cksum); u32 krb5_make_token(struct krb5_ctx *context_handle, int qop_req, @@ -128,6 +128,15 @@ krb5_read_token(struct krb5_ctx *context_handle, struct xdr_netobj *input_token_buffer, struct xdr_buf *message_buffer, int *qop_state); +u32 +gss_wrap_kerberos(struct gss_ctx *ctx_id, u32 qop, int offset, + struct xdr_buf *outbuf, struct page **pages); + +u32 +gss_unwrap_kerberos(struct gss_ctx *ctx_id, u32 *qop, int offset, + struct xdr_buf *buf); + + u32 krb5_encrypt(struct crypto_tfm * key, void *iv, void *in, void *out, int length); @@ -136,6 +145,13 @@ u32 krb5_decrypt(struct crypto_tfm * key, void *iv, void *in, void *out, int length); +int +gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *outbuf, int offset, + struct page **pages); + +int +gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *inbuf, int offset); + s32 krb5_make_seq_num(struct crypto_tfm * key, int direction, -- cgit v1.2.3 From 00fd6e14255fe7a249315746386d640bc4e9e758 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 13 Oct 2005 16:55:18 -0400 Subject: RPCSEC_GSS remove all qop parameters Not only are the qop parameters that are passed around throughout the gssapi unused by any currently implemented mechanism, but there appears to be some doubt as to whether they will ever be used. Let's just kill them off for now. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_api.h | 13 ++----------- include/linux/sunrpc/gss_err.h | 10 ---------- include/linux/sunrpc/gss_krb5.h | 8 ++++---- include/linux/sunrpc/gss_spkm3.h | 4 ++-- 4 files changed, 8 insertions(+), 27 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index e896752ffbf9..9b8bcf125c18 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -40,23 +40,19 @@ int gss_import_sec_context( struct gss_ctx **ctx_id); u32 gss_get_mic( struct gss_ctx *ctx_id, - u32 qop, struct xdr_buf *message, struct xdr_netobj *mic_token); u32 gss_verify_mic( struct gss_ctx *ctx_id, struct xdr_buf *message, - struct xdr_netobj *mic_token, - u32 *qstate); + struct xdr_netobj *mic_token); u32 gss_wrap( struct gss_ctx *ctx_id, - u32 qop, int offset, struct xdr_buf *outbuf, struct page **inpages); u32 gss_unwrap( struct gss_ctx *ctx_id, - u32 *qop, int offset, struct xdr_buf *inbuf); u32 gss_delete_sec_context( @@ -67,7 +63,6 @@ char *gss_service_to_auth_domain_name(struct gss_api_mech *, u32 service); struct pf_desc { u32 pseudoflavor; - u32 qop; u32 service; char *name; char *auth_domain_name; @@ -96,23 +91,19 @@ struct gss_api_ops { struct gss_ctx *ctx_id); u32 (*gss_get_mic)( struct gss_ctx *ctx_id, - u32 qop, struct xdr_buf *message, struct xdr_netobj *mic_token); u32 (*gss_verify_mic)( struct gss_ctx *ctx_id, struct xdr_buf *message, - struct xdr_netobj *mic_token, - u32 *qstate); + struct xdr_netobj *mic_token); u32 (*gss_wrap)( struct gss_ctx *ctx_id, - u32 qop, int offset, struct xdr_buf *outbuf, struct page **inpages); u32 (*gss_unwrap)( struct gss_ctx *ctx_id, - u32 *qop, int offset, struct xdr_buf *buf); void (*gss_delete_sec_context)( diff --git a/include/linux/sunrpc/gss_err.h b/include/linux/sunrpc/gss_err.h index 92608a2e574c..a6807867bd21 100644 --- a/include/linux/sunrpc/gss_err.h +++ b/include/linux/sunrpc/gss_err.h @@ -65,16 +65,6 @@ typedef unsigned int OM_uint32; #define GSS_C_MECH_CODE 2 -/* - * Define the default Quality of Protection for per-message services. Note - * that an implementation that offers multiple levels of QOP may either reserve - * a value (for example zero, as assumed here) to mean "default protection", or - * alternatively may simply equate GSS_C_QOP_DEFAULT to a specific explicit - * QOP value. However a value of 0 should always be interpreted by a GSSAPI - * implementation as a request for the default protection level. - */ -#define GSS_C_QOP_DEFAULT 0 - /* * Expiration time of 2^32-1 seconds means infinite lifetime for a * credential or security context diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 7f93c2d5ebdb..a7bda4edb853 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -119,21 +119,21 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, int body_offset, struct xdr_netobj *cksum); u32 -krb5_make_token(struct krb5_ctx *context_handle, int qop_req, +krb5_make_token(struct krb5_ctx *context_handle, struct xdr_buf *input_message_buffer, struct xdr_netobj *output_message_buffer); u32 krb5_read_token(struct krb5_ctx *context_handle, struct xdr_netobj *input_token_buffer, - struct xdr_buf *message_buffer, int *qop_state); + struct xdr_buf *message_buffer); u32 -gss_wrap_kerberos(struct gss_ctx *ctx_id, u32 qop, int offset, +gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset, struct xdr_buf *outbuf, struct page **pages); u32 -gss_unwrap_kerberos(struct gss_ctx *ctx_id, u32 *qop, int offset, +gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, struct xdr_buf *buf); diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h index b5c9968c3c17..0beb2cf00a84 100644 --- a/include/linux/sunrpc/gss_spkm3.h +++ b/include/linux/sunrpc/gss_spkm3.h @@ -41,9 +41,9 @@ struct spkm3_ctx { #define SPKM_WRAP_TOK 5 #define SPKM_DEL_TOK 6 -u32 spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, struct xdr_buf * text, struct xdr_netobj * token, int toktype); +u32 spkm3_make_token(struct spkm3_ctx *ctx, struct xdr_buf * text, struct xdr_netobj * token, int toktype); -u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int *qop_state, int toktype); +u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int toktype); #define CKSUMTYPE_RSA_MD5 0x0007 -- cgit v1.2.3 From a0857d03b21fa54653c9d2fe7a315381176015b4 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 13 Oct 2005 16:55:23 -0400 Subject: RPCSEC_GSS: krb5 cleanup Remove some senseless wrappers. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_krb5.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux/sunrpc') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index a7bda4edb853..2c3601d31045 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -118,15 +118,11 @@ s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, int body_offset, struct xdr_netobj *cksum); -u32 -krb5_make_token(struct krb5_ctx *context_handle, - struct xdr_buf *input_message_buffer, - struct xdr_netobj *output_message_buffer); +u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *, + struct xdr_netobj *); -u32 -krb5_read_token(struct krb5_ctx *context_handle, - struct xdr_netobj *input_token_buffer, - struct xdr_buf *message_buffer); +u32 gss_verify_mic_kerberos(struct gss_ctx *, struct xdr_buf *, + struct xdr_netobj *); u32 gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset, -- cgit v1.2.3