From 422d2cb8f9afadba1ecd3614f658b6daaaa480fb Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Fri, 26 Feb 2010 15:32:31 -0800 Subject: ceph: reset osd after relevant messages timed out This simplifies the process of timing out messages. We keep lru of current messages that are in flight. If a timeout has passed, we reset the osd connection, so that messages will be retransmitted. This is a failsafe in case we hit some sort of problem sending out message to the OSD. Normally, we'll get notification via an updated osdmap if there are problems. If a request is older than the keepalive timeout, send a keepalive to ensure we detect any breaks in the TCP connection. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/osd_client.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/ceph/osd_client.h') diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h index f256eba6fe7a..1b1a3ca43afc 100644 --- a/fs/ceph/osd_client.h +++ b/fs/ceph/osd_client.h @@ -36,12 +36,15 @@ struct ceph_osd { void *o_authorizer_buf, *o_authorizer_reply_buf; size_t o_authorizer_buf_len, o_authorizer_reply_buf_len; unsigned long lru_ttl; + int o_marked_for_keepalive; + struct list_head o_keepalive_item; }; /* an in-flight request */ struct ceph_osd_request { u64 r_tid; /* unique for this client */ struct rb_node r_node; + struct list_head r_req_lru_item; struct list_head r_osd_item; struct ceph_osd *r_osd; struct ceph_pg r_pgid; @@ -67,7 +70,7 @@ struct ceph_osd_request { char r_oid[40]; /* object name */ int r_oid_len; - unsigned long r_timeout_stamp; + unsigned long r_sent_stamp; bool r_resend; /* msg send failed, needs retry */ struct ceph_file_layout r_file_layout; @@ -92,6 +95,7 @@ struct ceph_osd_client { u64 timeout_tid; /* tid of timeout triggering rq */ u64 last_tid; /* tid of last request */ struct rb_root requests; /* pending requests */ + struct list_head req_lru; /* pending requests lru */ int num_requests; struct delayed_work timeout_work; struct delayed_work osds_timeout_work; -- cgit v1.2.3