diff options
author | Brian Behlendorf <behlendorf1@llnl.gov> | 2014-04-27 13:06:47 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2014-04-27 10:24:54 -0700 |
commit | 44779340c6dcb8c858955c02b3aeef7d6b28a684 (patch) | |
tree | bfbd8f6ec331e58db7df23d342f46ed8f7b044fb /drivers/staging/lustre/lustre/lmv | |
parent | e69cd00cb3b3ce49954947ee3f78ba887d7ac8d0 (diff) |
staging/lustre: Limit reply buffer size
When allocating a reply buffer for the striping information don't
assume the unlikely worst case. Instead, assume the common case
and size the buffer based on the observed default ea/cookie size.
The default size is initialized to a single stripe and allowed to
grow up to an entire page if needed. This means that for smallish
filesystems (less than ~21 OSTs) where the worst case striping
information can fit in a single page there is effectively no
change. Only for larger filesystem will the default be less than
the maximum. This has a number of advantages.
* By limiting the default reply buffer size we avoid always
vmalloc()'ing the buffer because it exceeds four pages in size
and instead kmalloc() it. This prevents the client from
thrashing on the global vmalloc() spin lock.
* A reply buffer of exactly the right size (no larger) is allocated
in the overflow case. These larger reply buffers are still
unlikely to exceed the 16k limit where a vmalloc() will occur.
* Saves memory in the common case. Wide striped files exceeded
the default are expected to be the exception.
The reason this patch works is because the ptlrpc layer is smart
enough to reallocate the reply buffer when an overflow occurs.
Therefore the client doesn't have to drop the incoming reply and
send a new request with a larger reply buffer.
It's also worth mentioning that the reply buffer always contains
a significant amount of extra padding because they are rounded up
to the nearest power of two. This means that even files striped
wider than the default have a good chance of fitting in the
allocated reply buffer.
Also remove client eadatasize check in mdt xattr packing because
as said above client can handle -EOVERFLOW.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-on: http://review.whamcloud.com/6339
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3338
Reviewed-by: James Simmons <uja.ornl@gmail.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Bob Glossman <bob.glossman@intel.com>
Signed-off-by: Oleg Drokin <oleg.drokin@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/staging/lustre/lustre/lmv')
-rw-r--r-- | drivers/staging/lustre/lustre/lmv/lmv_obd.c | 25 |
1 files changed, 16 insertions, 9 deletions
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c index 3ba0a0a1d945..4edf8a31221c 100644 --- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c +++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c @@ -280,7 +280,7 @@ static void lmv_set_timeouts(struct obd_device *obd) } static int lmv_init_ea_size(struct obd_export *exp, int easize, - int def_easize, int cookiesize) + int def_easize, int cookiesize, int def_cookiesize) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; @@ -300,6 +300,10 @@ static int lmv_init_ea_size(struct obd_export *exp, int easize, lmv->max_cookiesize = cookiesize; change = 1; } + if (lmv->max_def_cookiesize < def_cookiesize) { + lmv->max_def_cookiesize = def_cookiesize; + change = 1; + } if (change == 0) return 0; @@ -315,7 +319,7 @@ static int lmv_init_ea_size(struct obd_export *exp, int easize, } rc = md_init_ea_size(lmv->tgts[i]->ltd_exp, easize, def_easize, - cookiesize); + cookiesize, def_cookiesize); if (rc) { CERROR("%s: obd_init_ea_size() failed on MDT target %d:" " rc = %d.\n", obd->obd_name, i, rc); @@ -400,8 +404,8 @@ int lmv_connect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt) tgt->ltd_exp = mdc_exp; lmv->desc.ld_active_tgt_count++; - md_init_ea_size(tgt->ltd_exp, lmv->max_easize, - lmv->max_def_easize, lmv->max_cookiesize); + md_init_ea_size(tgt->ltd_exp, lmv->max_easize, lmv->max_def_easize, + lmv->max_cookiesize, lmv->max_def_cookiesize); CDEBUG(D_CONFIG, "Connected to %s(%s) successfully (%d)\n", mdc_obd->obd_name, mdc_obd->obd_uuid.uuid, @@ -527,9 +531,8 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, spin_unlock(&lmv->lmv_lock); } else { int easize = sizeof(struct lmv_stripe_md) + - lmv->desc.ld_tgt_count * - sizeof(struct lu_fid); - lmv_init_ea_size(obd->obd_self_export, easize, 0, 0); + lmv->desc.ld_tgt_count * sizeof(struct lu_fid); + lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0); } } @@ -578,7 +581,7 @@ int lmv_check_connect(struct obd_device *obd) class_export_put(lmv->exp); lmv->connected = 1; easize = lmv_get_easize(lmv); - lmv_init_ea_size(obd->obd_self_export, easize, 0, 0); + lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0); lmv_init_unlock(lmv); return 0; @@ -2340,7 +2343,11 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp, return 0; } return -EINVAL; - } else if (KEY_IS(KEY_MAX_EASIZE) || KEY_IS(KEY_CONN_DATA)) { + } else if (KEY_IS(KEY_MAX_EASIZE) || + KEY_IS(KEY_DEFAULT_EASIZE) || + KEY_IS(KEY_MAX_COOKIESIZE) || + KEY_IS(KEY_DEFAULT_COOKIESIZE) || + KEY_IS(KEY_CONN_DATA)) { rc = lmv_check_connect(obd); if (rc) return rc; |