From 9d521470a40f16110bd31018034155c60c1a1275 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 31 Jan 2014 17:54:26 +0200 Subject: libceph: a per-osdc crush scratch buffer With the addition of erasure coding support in the future, scratch variable-length array in crush_do_rule_ary() is going to grow to at least 200 bytes on average, on top of another 128 bytes consumed by rawosd/osd arrays in the call chain. Replace it with a buffer inside struct osdmap and a mutex. This shouldn't result in any contention, because all osd requests were already serialized by request_mutex at that point; the only unlocked caller was ceph_ioctl_get_dataloc(). Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/osdmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 49ff69f0746b..8c8b3cefc28b 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -84,6 +84,9 @@ struct ceph_osdmap { /* the CRUSH map specifies the mapping of placement groups to * the list of osds that store+replicate them. */ struct crush_map *crush; + + struct mutex crush_scratch_mutex; + int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3]; }; static inline void ceph_oid_set_name(struct ceph_object_id *oid, -- cgit v1.2.3 From 7b25bf5f02c5c80adf96120e031dc3a1756ce54d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 25 Feb 2014 16:22:26 +0200 Subject: libceph: encode CEPH_OSD_OP_FLAG_* op flags Encode ceph_osd_op::flags field so that it gets sent over the wire. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 1 + include/linux/ceph/rados.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index fd47e872ebcc..e94f5da251d6 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -76,6 +76,7 @@ struct ceph_osd_data { struct ceph_osd_req_op { u16 op; /* CEPH_OSD_OP_* */ + u32 flags; /* CEPH_OSD_OP_FLAG_* */ u32 payload_len; union { struct ceph_osd_data raw_data_in; diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 96292df4041b..8f9bf4570215 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -382,7 +382,7 @@ enum { */ struct ceph_osd_op { __le16 op; /* CEPH_OSD_OP_* */ - __le32 flags; /* CEPH_OSD_FLAG_* */ + __le32 flags; /* CEPH_OSD_OP_FLAG_* */ union { struct { __le64 offset, length; -- cgit v1.2.3 From c647b8a8c6366f849c2a237bfe525cb1d316d5f4 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 25 Feb 2014 16:22:27 +0200 Subject: libceph: add support for CEPH_OSD_OP_SETALLOCHINT osd op This is primarily for rbd's benefit and is supposed to combat fragmentation: "... knowing that rbd images have a 4m size, librbd can pass a hint that will let the osd do the xfs allocation size ioctl on new files so that they are allocated in 1m or 4m chunks. We've seen cases where users with rbd workloads have very high levels of fragmentation in xfs and this would mitigate that and probably have a pretty nice performance benefit." SETALLOCHINT is considered advisory, so our backwards compatibility mechanism here is to set FAILOK flag for all SETALLOCHINT ops. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 8 ++++++++ include/linux/ceph/rados.h | 7 +++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index e94f5da251d6..c42d1ada685f 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -103,6 +103,10 @@ struct ceph_osd_req_op { u32 timeout; __u8 flag; } watch; + struct { + u64 expected_object_size; + u64 expected_write_size; + } alloc_hint; }; }; @@ -294,6 +298,10 @@ extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req, extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u64 cookie, u64 version, int flag); +extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, + unsigned int which, + u64 expected_object_size, + u64 expected_write_size); extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 8f9bf4570215..2caabef8d369 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -227,6 +227,9 @@ enum { CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24, CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25, + /* hints */ + CEPH_OSD_OP_SETALLOCHINT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 35, + /** multi **/ CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1, CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2, @@ -416,6 +419,10 @@ struct ceph_osd_op { __le64 offset, length; __le64 src_offset; } __attribute__ ((packed)) clonerange; + struct { + __le64 expected_object_size; + __le64 expected_write_size; + } __attribute__ ((packed)) alloc_hint; }; __le32 payload_len; } __attribute__ ((packed)); -- cgit v1.2.3 From 7cc69d42e6950404587bef9489a5ed6f9f6bab4e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 25 Feb 2014 16:22:27 +0200 Subject: libceph: bump CEPH_OSD_MAX_OP to 3 Our longest osd request now contains 3 ops: copyup+hint+write. Also, CEPH_OSD_MAX_OP value in a BUG_ON in rbd_osd_req_callback() was hard-coded to 2. Fix it, and switch to rbd_assert while at it. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index c42d1ada685f..94ec69672164 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -43,7 +43,7 @@ struct ceph_osd { }; -#define CEPH_OSD_MAX_OP 2 +#define CEPH_OSD_MAX_OP 3 enum ceph_osd_data_type { CEPH_OSD_DATA_TYPE_NONE = 0, -- cgit v1.2.3 From 19913b4eac4a230dccb548931358398f45dabe4c Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 6 Mar 2014 16:40:32 +0800 Subject: ceph: add get_name() NFS export callback Use the newly introduced LOOKUPNAME MDS request to connect child inode to its parent directory. Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- include/linux/ceph/ceph_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 25bfb0eff772..35f345f7b3a3 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -332,6 +332,7 @@ enum { CEPH_MDS_OP_LOOKUPHASH = 0x00102, CEPH_MDS_OP_LOOKUPPARENT = 0x00103, CEPH_MDS_OP_LOOKUPINO = 0x00104, + CEPH_MDS_OP_LOOKUPNAME = 0x00105, CEPH_MDS_OP_SETXATTR = 0x01105, CEPH_MDS_OP_RMXATTR = 0x01106, -- cgit v1.2.3 From eb13e832f823f6c110ea53e3067bafe22b87de63 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 9 Mar 2014 23:16:40 +0800 Subject: ceph: use fl->fl_file as owner identifier of flock and posix lock flock and posix lock should use fl->fl_file instead of process ID as owner identifier. (posix lock uses fl->fl_owner. fl->fl_owner is usually equal to fl->fl_file, but it also can be a customized value). The process ID of who holds the lock is just for F_GETLK fcntl(2). The fix is rename the 'pid' fields of struct ceph_mds_request_args and struct ceph_filelock to 'owner', rename 'pid_namespace' fields to 'pid'. Assign fl->fl_file to the 'owner' field of lock messages. We also set the most significant bit of the 'owner' field. MDS can use that bit to distinguish between old and new clients. The MDS counterpart of this patch modifies the flock code to not take the 'pid_namespace' into consideration when checking conflict locks. Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- include/linux/ceph/ceph_fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 35f345f7b3a3..5f6db18d72e8 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -421,8 +421,8 @@ union ceph_mds_request_args { struct { __u8 rule; /* currently fcntl or flock */ __u8 type; /* shared, exclusive, remove*/ + __le64 owner; /* owner of the lock */ __le64 pid; /* process id requesting the lock */ - __le64 pid_namespace; __le64 start; /* initial location to lock */ __le64 length; /* num bytes to lock from start */ __u8 wait; /* will caller wait for lock to become available? */ @@ -533,8 +533,8 @@ struct ceph_filelock { __le64 start;/* file offset to start lock at */ __le64 length; /* num bytes to lock; 0 for all following start */ __le64 client; /* which client holds the lock */ + __le64 owner; /* owner the lock */ __le64 pid; /* process id holding the lock on the client */ - __le64 pid_namespace; __u8 type; /* shared lock, exclusive lock, or unlock */ } __attribute__ ((packed)); -- cgit v1.2.3 From 07bd7de47a65767432ceb66d4ab30cdc05ed2b35 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 19 Mar 2014 16:58:37 +0200 Subject: crush: support chooseleaf_vary_r tunable (tunables3) by default Add TUNABLES3 feature (chooseleaf_vary_r tunable) to a set of features supported by default. Signed-off-by: Ilya Dryomov Reviewed-by: Josh Durgin --- include/linux/ceph/ceph_features.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 138448f766b4..77c097fe9ea9 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -43,6 +43,13 @@ #define CEPH_FEATURE_CRUSH_V2 (1ULL<<36) /* new indep; SET_* steps */ #define CEPH_FEATURE_EXPORT_PEER (1ULL<<37) #define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38) +#define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38) /* overlap with EC */ +/* The process supports new-style OSDMap encoding. Monitors also use + this bit to determine if peers support NAK messages. */ +#define CEPH_FEATURE_OSDMAP_ENC (1ULL<<39) +#define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40) +#define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41) +#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */ /* * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature @@ -82,7 +89,8 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_OSDHASHPSPOOL | \ CEPH_FEATURE_OSD_CACHEPOOL | \ CEPH_FEATURE_CRUSH_V2 | \ - CEPH_FEATURE_EXPORT_PEER) + CEPH_FEATURE_EXPORT_PEER | \ + CEPH_FEATURE_CRUSH_TUNABLES3) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ -- cgit v1.2.3 From a2505d63ee0541d9b4685250b033192e68222e97 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 13 Mar 2014 16:36:13 +0200 Subject: libceph: split osdmap allocation and decode steps Split osdmap allocation and initialization into a separate function, ceph_osdmap_decode(). Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 8c8b3cefc28b..46c3e304c3d8 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -156,7 +156,7 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) return 0; } -extern struct ceph_osdmap *osdmap_decode(void **p, void *end); +extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end); extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_osdmap *map, struct ceph_messenger *msgr); -- cgit v1.2.3 From 35a935d75d51abe58d3427a8b4ae3745a5a14e1c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 21 Mar 2014 19:05:29 +0200 Subject: libceph: generalize ceph_pg_mapping In preparation for adding support for primary_temp mappings, generalize struct ceph_pg_mapping so it can hold mappings other than pg_temp. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 46c3e304c3d8..4837e58e3203 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -60,8 +60,13 @@ struct ceph_object_id { struct ceph_pg_mapping { struct rb_node node; struct ceph_pg pgid; - int len; - int osds[]; + + union { + struct { + int len; + int osds[]; + } pg_temp; + }; }; struct ceph_osdmap { -- cgit v1.2.3 From 9686f94c8cfc06e8afb7b2233ab8f1f6ac01957f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 21 Mar 2014 19:05:29 +0200 Subject: libceph: primary_temp infrastructure Add primary_temp mappings infrastructure. struct ceph_pg_mapping is overloaded, primary_temp mappings are stored in an rb-tree, rooted at ceph_osdmap, in a manner similar to pg_temp mappings. Dump primary_temp mappings to /sys/kernel/debug/ceph//osdmap, one 'primary_temp ' per line, e.g: primary_temp 2.6 4 Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 4837e58e3203..db4fb6322aae 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -66,6 +66,9 @@ struct ceph_pg_mapping { int len; int osds[]; } pg_temp; + struct { + int osd; + } primary_temp; }; }; @@ -83,6 +86,8 @@ struct ceph_osdmap { struct ceph_entity_addr *osd_addr; struct rb_root pg_temp; + struct rb_root primary_temp; + struct rb_root pg_pools; u32 pool_max; -- cgit v1.2.3 From 2cfa34f2d67a36e292cbe6e4c1e60d212b7ba4d1 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 21 Mar 2014 19:05:30 +0200 Subject: libceph: primary_affinity infrastructure Add primary_affinity infrastructure. primary_affinity values are stored in an max_osd-sized array, hanging off ceph_osdmap, similar to a osd_weight array. Introduce {get,set}_primary_affinity() helpers, primarily to return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY when no affinity has been set and to abstract out osd_primary_affinity array allocation and initialization. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 3 +++ include/linux/ceph/rados.h | 4 ++++ 2 files changed, 7 insertions(+) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index db4fb6322aae..6e030cb3c9ca 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -88,6 +88,8 @@ struct ceph_osdmap { struct rb_root pg_temp; struct rb_root primary_temp; + u32 *osd_primary_affinity; + struct rb_root pg_pools; u32 pool_max; @@ -134,6 +136,7 @@ static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) } extern char *ceph_osdmap_state_str(char *str, int len, int state); +extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd); static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, int osd) diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 2caabef8d369..bb6f40c9cb0f 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -133,6 +133,10 @@ extern const char *ceph_osd_state_name(int s); #define CEPH_OSD_IN 0x10000 #define CEPH_OSD_OUT 0 +/* osd primary-affinity. fixed point value: 0x10000 == baseline */ +#define CEPH_OSD_MAX_PRIMARY_AFFINITY 0x10000 +#define CEPH_OSD_DEFAULT_PRIMARY_AFFINITY 0x10000 + /* * osd map flag bits -- cgit v1.2.3 From ddf3a21a03d0f01c5ba83deaecd2d0c381d5ef42 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 21 Mar 2014 19:05:31 +0200 Subject: libceph: enable OSDMAP_ENC feature bit Announce our support for "new" (v7 - split and separately versioned client and osd sections) osdmap enconding. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/ceph_features.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 77c097fe9ea9..7a4cab50b2cd 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -90,6 +90,7 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_OSD_CACHEPOOL | \ CEPH_FEATURE_CRUSH_V2 | \ CEPH_FEATURE_EXPORT_PEER | \ + CEPH_FEATURE_OSDMAP_ENC | \ CEPH_FEATURE_CRUSH_TUNABLES3) #define CEPH_FEATURES_REQUIRED_DEFAULT \ -- cgit v1.2.3 From 246138fa6787db6f4016f26604fdc05dc9f95627 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Mar 2014 17:12:46 +0200 Subject: libceph: ceph_osd_{exists,is_up,is_down}(osd) definitions Sync up with ceph.git definitions. Bring in ceph_osd_is_down(). Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 6e030cb3c9ca..0895797b9e28 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -125,9 +125,21 @@ static inline void ceph_oid_copy(struct ceph_object_id *dest, dest->name_len = src->name_len; } +static inline int ceph_osd_exists(struct ceph_osdmap *map, int osd) +{ + return osd >= 0 && osd < map->max_osd && + (map->osd_state[osd] & CEPH_OSD_EXISTS); +} + static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd) { - return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP); + return ceph_osd_exists(map, osd) && + (map->osd_state[osd] & CEPH_OSD_UP); +} + +static inline int ceph_osd_is_down(struct ceph_osdmap *map, int osd) +{ + return !ceph_osd_is_up(map, osd); } static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) -- cgit v1.2.3 From 2abebdbca7997422bfab6bf8b6559384a6b95294 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Mar 2014 17:12:47 +0200 Subject: libceph: ceph_can_shift_osds(pool) and pool type defines Bring in pg_pool_t::can_shift_osds() counterpart along with pool type defines. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 12 ++++++++++++ include/linux/ceph/rados.h | 5 +++-- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 0895797b9e28..4e28c1e5d62f 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -41,6 +41,18 @@ struct ceph_pg_pool_info { char *name; }; +static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool) +{ + switch (pool->type) { + case CEPH_POOL_TYPE_REP: + return true; + case CEPH_POOL_TYPE_EC: + return false; + default: + BUG_ON(1); + } +} + struct ceph_object_locator { s64 pool; }; diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index bb6f40c9cb0f..f20e0d8a2155 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -81,8 +81,9 @@ struct ceph_pg_v1 { */ #define CEPH_NOPOOL ((__u64) (-1)) /* pool id not defined */ -#define CEPH_PG_TYPE_REP 1 -#define CEPH_PG_TYPE_RAID4 2 +#define CEPH_POOL_TYPE_REP 1 +#define CEPH_POOL_TYPE_RAID4 2 /* never implemented */ +#define CEPH_POOL_TYPE_EC 3 /* * stable_mod func is used to control number of placement groups. -- cgit v1.2.3 From ac972230e20581b044f5ce66dcaf3c5af8d57444 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Mar 2014 17:12:48 +0200 Subject: libceph: switch ceph_calc_pg_acting() to new helpers Switch ceph_calc_pg_acting() to new helpers: pg_to_raw_osds(), raw_to_up_osds() and apply_temps(). Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 4e28c1e5d62f..b0c8f8490663 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -212,7 +212,7 @@ extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, - int *acting); + int *osds); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); -- cgit v1.2.3 From 8008ab1080c1768b02d232dcfd9e161cd47cc9f7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Mar 2014 17:12:48 +0200 Subject: libceph: return primary from ceph_calc_pg_acting() In preparation for adding support for primary_temp, stop assuming primaryness: add a primary out parameter to ceph_calc_pg_acting() and change call sites accordingly. Primary is now specified separately from the order of osds in the set. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index b0c8f8490663..561ea896c657 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -212,7 +212,7 @@ extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, - int *osds); + int *osds, int *primary); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); -- cgit v1.2.3 From 18cb95af2d7c69aa136ab13f02dd55188c120e75 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Mar 2014 17:12:50 +0200 Subject: libceph: enable PRIMARY_AFFINITY feature bit Announce our support for osdmaps with non-default primary affinity values. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/ceph_features.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/ceph') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 7a4cab50b2cd..d12659ce550d 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -91,7 +91,8 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_CRUSH_V2 | \ CEPH_FEATURE_EXPORT_PEER | \ CEPH_FEATURE_OSDMAP_ENC | \ - CEPH_FEATURE_CRUSH_TUNABLES3) + CEPH_FEATURE_CRUSH_TUNABLES3 | \ + CEPH_FEATURE_OSD_PRIMARY_AFFINITY) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ -- cgit v1.2.3