summaryrefslogtreecommitdiff
path: root/drivers/staging/lustre/lustre/mdc/mdc_request.c
diff options
context:
space:
mode:
authorBrian Behlendorf <behlendorf1@llnl.gov>2014-04-27 13:06:47 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-04-27 10:24:54 -0700
commit44779340c6dcb8c858955c02b3aeef7d6b28a684 (patch)
treebfbd8f6ec331e58db7df23d342f46ed8f7b044fb /drivers/staging/lustre/lustre/mdc/mdc_request.c
parente69cd00cb3b3ce49954947ee3f78ba887d7ac8d0 (diff)
staging/lustre: Limit reply buffer size
When allocating a reply buffer for the striping information don't assume the unlikely worst case. Instead, assume the common case and size the buffer based on the observed default ea/cookie size. The default size is initialized to a single stripe and allowed to grow up to an entire page if needed. This means that for smallish filesystems (less than ~21 OSTs) where the worst case striping information can fit in a single page there is effectively no change. Only for larger filesystem will the default be less than the maximum. This has a number of advantages. * By limiting the default reply buffer size we avoid always vmalloc()'ing the buffer because it exceeds four pages in size and instead kmalloc() it. This prevents the client from thrashing on the global vmalloc() spin lock. * A reply buffer of exactly the right size (no larger) is allocated in the overflow case. These larger reply buffers are still unlikely to exceed the 16k limit where a vmalloc() will occur. * Saves memory in the common case. Wide striped files exceeded the default are expected to be the exception. The reason this patch works is because the ptlrpc layer is smart enough to reallocate the reply buffer when an overflow occurs. Therefore the client doesn't have to drop the incoming reply and send a new request with a larger reply buffer. It's also worth mentioning that the reply buffer always contains a significant amount of extra padding because they are rounded up to the nearest power of two. This means that even files striped wider than the default have a good chance of fitting in the allocated reply buffer. Also remove client eadatasize check in mdt xattr packing because as said above client can handle -EOVERFLOW. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Lai Siyao <lai.siyao@intel.com> Reviewed-on: http://review.whamcloud.com/6339 Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3338 Reviewed-by: James Simmons <uja.ornl@gmail.com> Reviewed-by: Andreas Dilger <andreas.dilger@intel.com> Reviewed-by: Bob Glossman <bob.glossman@intel.com> Signed-off-by: Oleg Drokin <oleg.drokin@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/staging/lustre/lustre/mdc/mdc_request.c')
-rw-r--r--drivers/staging/lustre/lustre/mdc/mdc_request.c49
1 files changed, 42 insertions, 7 deletions
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index bde9f93c149b..fca43cf1d671 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -903,9 +903,9 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
mdc_close_pack(req, op_data);
req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
- obd->u.cli.cl_max_mds_easize);
+ obd->u.cli.cl_default_mds_easize);
req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER,
- obd->u.cli.cl_max_mds_cookiesize);
+ obd->u.cli.cl_default_mds_cookiesize);
ptlrpc_request_set_replen(req);
@@ -2153,12 +2153,40 @@ int mdc_get_info(const struct lu_env *env, struct obd_export *exp,
if (*vallen != sizeof(int))
return -EINVAL;
- mdsize = *(int*)val;
+ mdsize = *(int *)val;
if (mdsize > exp->exp_obd->u.cli.cl_max_mds_easize)
exp->exp_obd->u.cli.cl_max_mds_easize = mdsize;
max_easize = val;
*max_easize = exp->exp_obd->u.cli.cl_max_mds_easize;
return 0;
+ } else if (KEY_IS(KEY_DEFAULT_EASIZE)) {
+ int *default_easize;
+
+ if (*vallen != sizeof(int))
+ return -EINVAL;
+ default_easize = val;
+ *default_easize = exp->exp_obd->u.cli.cl_default_mds_easize;
+ return 0;
+ } else if (KEY_IS(KEY_MAX_COOKIESIZE)) {
+ int mdsize, *max_cookiesize;
+
+ if (*vallen != sizeof(int))
+ return -EINVAL;
+ mdsize = *(int *)val;
+ if (mdsize > exp->exp_obd->u.cli.cl_max_mds_cookiesize)
+ exp->exp_obd->u.cli.cl_max_mds_cookiesize = mdsize;
+ max_cookiesize = val;
+ *max_cookiesize = exp->exp_obd->u.cli.cl_max_mds_cookiesize;
+ return 0;
+ } else if (KEY_IS(KEY_DEFAULT_COOKIESIZE)) {
+ int *default_cookiesize;
+
+ if (*vallen != sizeof(int))
+ return -EINVAL;
+ default_cookiesize = val;
+ *default_cookiesize =
+ exp->exp_obd->u.cli.cl_default_mds_cookiesize;
+ return 0;
} else if (KEY_IS(KEY_CONN_DATA)) {
struct obd_import *imp = class_exp2cliimp(exp);
struct obd_connect_data *data = val;
@@ -2439,11 +2467,15 @@ err_rpc_lock:
}
/* Initialize the default and maximum LOV EA and cookie sizes. This allows
- * us to make MDS RPCs with large enough reply buffers to hold the
- * maximum-sized (= maximum striped) EA and cookie without having to
- * calculate this (via a call into the LOV + OSCs) each time we make an RPC. */
+ * us to make MDS RPCs with large enough reply buffers to hold a default
+ * sized EA and cookie without having to calculate this (via a call into the
+ * LOV + OSCs) each time we make an RPC. The maximum size is also tracked
+ * but not used to avoid wastefully vmalloc()'ing large reply buffers when
+ * a large number of stripes is possible. If a larger reply buffer is
+ * required it will be reallocated in the ptlrpc layer due to overflow.
+ */
static int mdc_init_ea_size(struct obd_export *exp, int easize,
- int def_easize, int cookiesize)
+ int def_easize, int cookiesize, int def_cookiesize)
{
struct obd_device *obd = exp->exp_obd;
struct client_obd *cli = &obd->u.cli;
@@ -2457,6 +2489,9 @@ static int mdc_init_ea_size(struct obd_export *exp, int easize,
if (cli->cl_max_mds_cookiesize < cookiesize)
cli->cl_max_mds_cookiesize = cookiesize;
+ if (cli->cl_default_mds_cookiesize < def_cookiesize)
+ cli->cl_default_mds_cookiesize = def_cookiesize;
+
return 0;
}