From 8fc91fd85950d106883852c6d215614ec28cc92d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 6 Oct 2009 11:31:13 -0700 Subject: ceph: message pools The msgpool is a basic mempool_t-like structure to preallocate messages we expect to receive over the wire. This ensures we have the necessary memory preallocated to process replies to requests, or to process unsolicited messages from various servers. Signed-off-by: Sage Weil --- fs/ceph/msgpool.c | 167 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 fs/ceph/msgpool.c (limited to 'fs/ceph/msgpool.c') diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c new file mode 100644 index 000000000000..39d4d7ed82ce --- /dev/null +++ b/fs/ceph/msgpool.c @@ -0,0 +1,167 @@ +#include "ceph_debug.h" + +#include +#include +#include +#include + +#include "msgpool.h" + +/* + * We use msg pools to preallocate memory for messages we expect to + * receive over the wire, to avoid getting ourselves into OOM + * conditions at unexpected times. We take use a few different + * strategies: + * + * - for request/response type interactions, we preallocate the + * memory needed for the response when we generate the request. + * + * - for messages we can receive at any time from the MDS, we preallocate + * a pool of messages we can re-use. + * + * - for writeback, we preallocate some number of messages to use for + * requests and their replies, so that we always make forward + * progress. + * + * The msgpool behaves like a mempool_t, but keeps preallocated + * ceph_msgs strung together on a list_head instead of using a pointer + * vector. This avoids vector reallocation when we adjust the number + * of preallocated items (which happens frequently). + */ + + +/* + * Allocate or release as necessary to meet our target pool size. + */ +static int __fill_msgpool(struct ceph_msgpool *pool) +{ + struct ceph_msg *msg; + + while (pool->num < pool->min) { + dout("fill_msgpool %p %d/%d allocating\n", pool, pool->num, + pool->min); + spin_unlock(&pool->lock); + msg = ceph_msg_new(0, pool->front_len, 0, 0, NULL); + spin_lock(&pool->lock); + if (IS_ERR(msg)) + return PTR_ERR(msg); + msg->pool = pool; + list_add(&msg->list_head, &pool->msgs); + pool->num++; + } + while (pool->num > pool->min) { + msg = list_first_entry(&pool->msgs, struct ceph_msg, list_head); + dout("fill_msgpool %p %d/%d releasing %p\n", pool, pool->num, + pool->min, msg); + list_del_init(&msg->list_head); + pool->num--; + ceph_msg_kfree(msg); + } + return 0; +} + +int ceph_msgpool_init(struct ceph_msgpool *pool, + int front_len, int min, bool blocking) +{ + int ret; + + dout("msgpool_init %p front_len %d min %d\n", pool, front_len, min); + spin_lock_init(&pool->lock); + pool->front_len = front_len; + INIT_LIST_HEAD(&pool->msgs); + pool->num = 0; + pool->min = min; + pool->blocking = blocking; + init_waitqueue_head(&pool->wait); + + spin_lock(&pool->lock); + ret = __fill_msgpool(pool); + spin_unlock(&pool->lock); + return ret; +} + +void ceph_msgpool_destroy(struct ceph_msgpool *pool) +{ + dout("msgpool_destroy %p\n", pool); + spin_lock(&pool->lock); + pool->min = 0; + __fill_msgpool(pool); + spin_unlock(&pool->lock); +} + +int ceph_msgpool_resv(struct ceph_msgpool *pool, int delta) +{ + int ret; + + spin_lock(&pool->lock); + dout("msgpool_resv %p delta %d\n", pool, delta); + pool->min += delta; + ret = __fill_msgpool(pool); + spin_unlock(&pool->lock); + return ret; +} + +struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool) +{ + wait_queue_t wait; + struct ceph_msg *msg; + + if (pool->blocking) { + /* mempool_t behavior; first try to alloc */ + msg = ceph_msg_new(0, pool->front_len, 0, 0, NULL); + if (!IS_ERR(msg)) + return msg; + } + + while (1) { + spin_lock(&pool->lock); + if (likely(pool->num)) { + msg = list_entry(pool->msgs.next, struct ceph_msg, + list_head); + list_del_init(&msg->list_head); + pool->num--; + dout("msgpool_get %p got %p, now %d/%d\n", pool, msg, + pool->num, pool->min); + spin_unlock(&pool->lock); + return msg; + } + pr_err("msgpool_get %p now %d/%d, %s\n", pool, pool->num, + pool->min, pool->blocking ? "waiting" : "failing"); + spin_unlock(&pool->lock); + + if (!pool->blocking) { + WARN_ON(1); + + /* maybe we can allocate it now? */ + msg = ceph_msg_new(0, pool->front_len, 0, 0, NULL); + if (!IS_ERR(msg)) + return msg; + + return ERR_PTR(-ENOMEM); + } + + init_wait(&wait); + prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); + schedule(); + finish_wait(&pool->wait, &wait); + } +} + +void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) +{ + spin_lock(&pool->lock); + if (pool->num < pool->min) { + ceph_msg_get(msg); /* retake a single ref */ + list_add(&msg->list_head, &pool->msgs); + pool->num++; + dout("msgpool_put %p reclaim %p, now %d/%d\n", pool, msg, + pool->num, pool->min); + spin_unlock(&pool->lock); + wake_up(&pool->wait); + } else { + dout("msgpool_put %p drop %p, at %d/%d\n", pool, msg, + pool->num, pool->min); + spin_unlock(&pool->lock); + ceph_msg_kfree(msg); + } +} -- cgit v1.2.3 From 8f3bc053c610826a657714649ea596f07875db2e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 14 Oct 2009 17:36:07 -0700 Subject: ceph: warn on allocation from msgpool with larger front_len Pass the front_len we need when pulling a message off a msgpool, and WARN if it is greater than the pool's size. Then try to allocate a new message (to continue without failing). Signed-off-by: Sage Weil --- fs/ceph/msgpool.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'fs/ceph/msgpool.c') diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c index 39d4d7ed82ce..7599b3382076 100644 --- a/fs/ceph/msgpool.c +++ b/fs/ceph/msgpool.c @@ -101,14 +101,28 @@ int ceph_msgpool_resv(struct ceph_msgpool *pool, int delta) return ret; } -struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool) +struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) { wait_queue_t wait; struct ceph_msg *msg; + if (front_len && front_len > pool->front_len) { + pr_err("msgpool_get pool %p need front %d, pool size is %d\n", + pool, front_len, pool->front_len); + WARN_ON(1); + + /* try to alloc a fresh message */ + msg = ceph_msg_new(0, front_len, 0, 0, NULL); + if (!IS_ERR(msg)) + return msg; + } + + if (!front_len) + front_len = pool->front_len; + if (pool->blocking) { /* mempool_t behavior; first try to alloc */ - msg = ceph_msg_new(0, pool->front_len, 0, 0, NULL); + msg = ceph_msg_new(0, front_len, 0, 0, NULL); if (!IS_ERR(msg)) return msg; } @@ -133,7 +147,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool) WARN_ON(1); /* maybe we can allocate it now? */ - msg = ceph_msg_new(0, pool->front_len, 0, 0, NULL); + msg = ceph_msg_new(0, front_len, 0, 0, NULL); if (!IS_ERR(msg)) return msg; -- cgit v1.2.3 From c2e552e76e2c6907ca50cd9a4b747a2e2e8c615e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 7 Dec 2009 15:55:05 -0800 Subject: ceph: use kref for ceph_msg Signed-off-by: Sage Weil --- fs/ceph/msgpool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ceph/msgpool.c') diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c index 7599b3382076..ad5482c0267b 100644 --- a/fs/ceph/msgpool.c +++ b/fs/ceph/msgpool.c @@ -165,7 +165,7 @@ void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) { spin_lock(&pool->lock); if (pool->num < pool->min) { - ceph_msg_get(msg); /* retake a single ref */ + kref_set(&msg->kref, 1); /* retake a single ref */ list_add(&msg->list_head, &pool->msgs); pool->num++; dout("msgpool_put %p reclaim %p, now %d/%d\n", pool, msg, -- cgit v1.2.3 From 0cf90ab5b075821940873e73cdbfeb8edc3dabe8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 22 Dec 2009 10:45:18 -0800 Subject: ceph: more informative msgpool errors Signed-off-by: Sage Weil --- fs/ceph/msgpool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ceph/msgpool.c') diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c index ad5482c0267b..2f04e0fc4666 100644 --- a/fs/ceph/msgpool.c +++ b/fs/ceph/msgpool.c @@ -140,7 +140,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) return msg; } pr_err("msgpool_get %p now %d/%d, %s\n", pool, pool->num, - pool->min, pool->blocking ? "waiting" : "failing"); + pool->min, pool->blocking ? "waiting" : "may fail"); spin_unlock(&pool->lock); if (!pool->blocking) { @@ -151,6 +151,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) if (!IS_ERR(msg)) return msg; + pr_err("msgpool_get %p empty + alloc failed\n", pool); return ERR_PTR(-ENOMEM); } -- cgit v1.2.3 From 3ca02ef96e119d36bc1752baeae7dd0c59c2f325 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 1 Mar 2010 15:25:00 -0800 Subject: ceph: reset front len on return to msgpool; BUG on mismatched front iov Reset msg front len when a message is returned to the pool: the caller may have changed it. BUG if we try to send a message with a hdr.front_len that doesn't match the front iov. Signed-off-by: Sage Weil --- fs/ceph/msgpool.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/ceph/msgpool.c') diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c index 2f04e0fc4666..ca3b44a89f2d 100644 --- a/fs/ceph/msgpool.c +++ b/fs/ceph/msgpool.c @@ -166,6 +166,10 @@ void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) { spin_lock(&pool->lock); if (pool->num < pool->min) { + /* reset msg front_len; user may have changed it */ + msg->front.iov_len = pool->front_len; + msg->hdr.front_len = cpu_to_le32(pool->front_len); + kref_set(&msg->kref, 1); /* retake a single ref */ list_add(&msg->list_head, &pool->msgs); pool->num++; -- cgit v1.2.3