From 34ce4e7c23e3da578e459b05c6fb17edecb19e6b Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Tue, 15 Dec 2009 19:34:17 +0200 Subject: exofs: debug print even less * Last debug trimming left in some stupid print, remove them. Fixup some other prints * Shift printing from inode.c to ios.c * Add couple of prints when memory allocation fails. Signed-off-by: Boaz Harrosh --- fs/exofs/ios.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'fs/exofs/ios.c') diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 5bad01fa1f9f..3cc0dd3f0eb2 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -26,6 +26,9 @@ #include "exofs.h" +#define EXOFS_DBGMSG2(M...) do {} while (0) +/* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */ + void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) { osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); @@ -73,6 +76,8 @@ int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios) */ ios = kzalloc(exofs_io_state_size(sbi->s_numdevs), GFP_KERNEL); if (unlikely(!ios)) { + EXOFS_DBGMSG("Faild kzalloc bytes=%d\n", + exofs_io_state_size(sbi->s_numdevs)); *pios = NULL; return -ENOMEM; } @@ -276,6 +281,9 @@ int exofs_sbi_write(struct exofs_io_state *ios) bio = bio_kmalloc(GFP_KERNEL, ios->bio->bi_max_vecs); if (unlikely(!bio)) { + EXOFS_DBGMSG( + "Faild to allocate BIO size=%u\n", + ios->bio->bi_max_vecs); ret = -ENOMEM; goto out; } @@ -290,14 +298,21 @@ int exofs_sbi_write(struct exofs_io_state *ios) osd_req_write(or, &ios->obj, ios->offset, bio, ios->length); -/* EXOFS_DBGMSG("write sync=%d\n", sync);*/ + EXOFS_DBGMSG("write(0x%llx) offset=0x%llx " + "length=0x%llx dev=%d\n", + _LLU(ios->obj.id), _LLU(ios->offset), + _LLU(ios->length), i); } else if (ios->kern_buff) { osd_req_write_kern(or, &ios->obj, ios->offset, ios->kern_buff, ios->length); -/* EXOFS_DBGMSG("write_kern sync=%d\n", sync);*/ + EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx " + "length=0x%llx dev=%d\n", + _LLU(ios->obj.id), _LLU(ios->offset), + _LLU(ios->length), i); } else { osd_req_set_attributes(or, &ios->obj); -/* EXOFS_DBGMSG("set_attributes sync=%d\n", sync);*/ + EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", + _LLU(ios->obj.id), ios->out_attr_len, i); } if (ios->out_attr) @@ -335,14 +350,25 @@ int exofs_sbi_read(struct exofs_io_state *ios) if (ios->bio) { osd_req_read(or, &ios->obj, ios->offset, ios->bio, ios->length); -/* EXOFS_DBGMSG("read sync=%d\n", sync);*/ + EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" + " dev=%d\n", _LLU(ios->obj.id), + _LLU(ios->offset), + _LLU(ios->length), + first_dev); } else if (ios->kern_buff) { osd_req_read_kern(or, &ios->obj, ios->offset, ios->kern_buff, ios->length); -/* EXOFS_DBGMSG("read_kern sync=%d\n", sync);*/ + EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " + "length=0x%llx dev=%d\n", + _LLU(ios->obj.id), + _LLU(ios->offset), + _LLU(ios->length), + first_dev); } else { osd_req_get_attributes(or, &ios->obj); -/* EXOFS_DBGMSG("get_attributes sync=%d\n", sync);*/ + EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", + _LLU(ios->obj.id), ios->in_attr_len, + first_dev); } if (ios->out_attr) -- cgit v1.2.3 From 22ddc556380cf5645c52292b6d980766646eb864 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Tue, 19 Jan 2010 19:24:45 +0200 Subject: exofs: Recover in the case of read-passed-end-of-file In check_io, implement the case of reading passed end of file, by clearing the pages and recover with no error. In a raid arrangement this can become a legitimate situation in case of holes in the file. Signed-off-by: Boaz Harrosh --- fs/exofs/ios.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) (limited to 'fs/exofs/ios.c') diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 3cc0dd3f0eb2..439c5d097b27 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -173,6 +173,21 @@ static int exofs_io_execute(struct exofs_io_state *ios) return ret; } +static void _clear_bio(struct bio *bio) +{ + struct bio_vec *bv; + unsigned i; + + __bio_for_each_segment(bv, bio, i, 0) { + unsigned this_count = bv->bv_len; + + if (likely(PAGE_SIZE == this_count)) + clear_highpage(bv->bv_page); + else + zero_user(bv->bv_page, bv->bv_offset, this_count); + } +} + int exofs_check_io(struct exofs_io_state *ios, u64 *resid) { enum osd_err_priority acumulated_osd_err = 0; @@ -181,16 +196,25 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) for (i = 0; i < ios->numdevs; i++) { struct osd_sense_info osi; - int ret = osd_req_decode_sense(ios->per_dev[i].or, &osi); + struct osd_request *or = ios->per_dev[i].or; + int ret; + + if (unlikely(!or)) + continue; + ret = osd_req_decode_sense(or, &osi); if (likely(!ret)) continue; - if (unlikely(ret == -EFAULT)) { - EXOFS_DBGMSG("%s: EFAULT Need page clear\n", __func__); - /*FIXME: All the pages in this device range should: - * clear_highpage(page); - */ + if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { + /* start read offset passed endof file */ + _clear_bio(ios->per_dev[i].bio); + EXOFS_DBGMSG("start read offset passed end of file " + "offset=0x%llx, length=0x%llx\n", + _LLU(ios->offset), + _LLU(ios->length)); + + continue; /* we recovered */ } if (osi.osd_err_pri >= acumulated_osd_err) { -- cgit v1.2.3 From 45d3abcb1a7388b2b97582e13bf9dd21784dcaa5 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 28 Jan 2010 11:46:16 +0200 Subject: exofs: Move layout related members to a layout structure * Abstract away those members in exofs_sb_info that are related/needed by a layout into a new exofs_layout structure. Embed it in exofs_sb_info. * At exofs_io_state receive/keep a pointer to an exofs_layout. No need for an exofs_sb_info pointer, all we need is at exofs_layout. * Change any usage of above exofs_sb_info members to their new name. Signed-off-by: Boaz Harrosh --- fs/exofs/ios.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'fs/exofs/ios.c') diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 439c5d097b27..83e54a77b992 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -67,23 +67,24 @@ out: return ret; } -int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios) +int exofs_get_io_state(struct exofs_layout *layout, + struct exofs_io_state **pios) { struct exofs_io_state *ios; /*TODO: Maybe use kmem_cach per sbi of size - * exofs_io_state_size(sbi->s_numdevs) + * exofs_io_state_size(layout->s_numdevs) */ - ios = kzalloc(exofs_io_state_size(sbi->s_numdevs), GFP_KERNEL); + ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL); if (unlikely(!ios)) { EXOFS_DBGMSG("Faild kzalloc bytes=%d\n", - exofs_io_state_size(sbi->s_numdevs)); + exofs_io_state_size(layout->s_numdevs)); *pios = NULL; return -ENOMEM; } - ios->sbi = sbi; - ios->obj.partition = sbi->s_pid; + ios->layout = layout; + ios->obj.partition = layout->s_pid; *pios = ios; return 0; } @@ -238,10 +239,10 @@ int exofs_sbi_create(struct exofs_io_state *ios) { int i, ret; - for (i = 0; i < ios->sbi->s_numdevs; i++) { + for (i = 0; i < ios->layout->s_numdevs; i++) { struct osd_request *or; - or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); + or = osd_start_request(ios->layout->s_ods[i], GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -262,10 +263,10 @@ int exofs_sbi_remove(struct exofs_io_state *ios) { int i, ret; - for (i = 0; i < ios->sbi->s_numdevs; i++) { + for (i = 0; i < ios->layout->s_numdevs; i++) { struct osd_request *or; - or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); + or = osd_start_request(ios->layout->s_ods[i], GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -286,10 +287,10 @@ int exofs_sbi_write(struct exofs_io_state *ios) { int i, ret; - for (i = 0; i < ios->sbi->s_numdevs; i++) { + for (i = 0; i < ios->layout->s_numdevs; i++) { struct osd_request *or; - or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL); + or = osd_start_request(ios->layout->s_ods[i], GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -361,8 +362,9 @@ int exofs_sbi_read(struct exofs_io_state *ios) struct osd_request *or; unsigned first_dev = (unsigned)ios->obj.id; - first_dev %= ios->sbi->s_numdevs; - or = osd_start_request(ios->sbi->s_ods[first_dev], GFP_KERNEL); + first_dev %= ios->layout->s_numdevs; + or = osd_start_request(ios->layout->s_ods[first_dev], + GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -438,7 +440,7 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) __be64 newsize; int i, ret; - if (exofs_get_io_state(sbi, &ios)) + if (exofs_get_io_state(&sbi->layout, &ios)) return -ENOMEM; ios->obj.id = exofs_oi_objno(oi); @@ -448,10 +450,10 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) attr = g_attr_logical_length; attr.val_ptr = &newsize; - for (i = 0; i < sbi->s_numdevs; i++) { + for (i = 0; i < sbi->layout.s_numdevs; i++) { struct osd_request *or; - or = osd_start_request(sbi->s_ods[i], GFP_KERNEL); + or = osd_start_request(sbi->layout.s_ods[i], GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; -- cgit v1.2.3 From 46f4d973f6874c06b7a41a3bf8f4c1717d90f97a Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 1 Feb 2010 11:37:30 +0200 Subject: exofs: unindent exofs_sbi_read The original idea was that a mirror read can be sub-divided to multiple devices. But this has very little gain and only at very large IOes so it's not going to be implemented soon. Signed-off-by: Boaz Harrosh --- fs/exofs/ios.c | 87 +++++++++++++++++++++++++--------------------------------- 1 file changed, 38 insertions(+), 49 deletions(-) (limited to 'fs/exofs/ios.c') diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 83e54a77b992..4f679317ca54 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -356,59 +356,48 @@ out: int exofs_sbi_read(struct exofs_io_state *ios) { - int i, ret; - - for (i = 0; i < 1; i++) { - struct osd_request *or; - unsigned first_dev = (unsigned)ios->obj.id; - - first_dev %= ios->layout->s_numdevs; - or = osd_start_request(ios->layout->s_ods[first_dev], - GFP_KERNEL); - if (unlikely(!or)) { - EXOFS_ERR("%s: osd_start_request failed\n", __func__); - ret = -ENOMEM; - goto out; - } - ios->per_dev[i].or = or; - ios->numdevs++; + struct osd_request *or; + struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; + unsigned first_dev = (unsigned)ios->obj.id; - if (ios->bio) { - osd_req_read(or, &ios->obj, ios->offset, ios->bio, - ios->length); - EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" - " dev=%d\n", _LLU(ios->obj.id), - _LLU(ios->offset), - _LLU(ios->length), - first_dev); - } else if (ios->kern_buff) { - osd_req_read_kern(or, &ios->obj, ios->offset, - ios->kern_buff, ios->length); - EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " - "length=0x%llx dev=%d\n", - _LLU(ios->obj.id), - _LLU(ios->offset), - _LLU(ios->length), - first_dev); - } else { - osd_req_get_attributes(or, &ios->obj); - EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", - _LLU(ios->obj.id), ios->in_attr_len, - first_dev); - } + first_dev %= ios->layout->s_numdevs; + or = osd_start_request(ios->layout->s_ods[first_dev], GFP_KERNEL); + if (unlikely(!or)) { + EXOFS_ERR("%s: osd_start_request failed\n", __func__); + return -ENOMEM; + } + per_dev->or = or; + ios->numdevs++; + + if (ios->bio) { + osd_req_read(or, &ios->obj, ios->offset, ios->bio, ios->length); + EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" + " dev=%d\n", _LLU(ios->obj.id), + _LLU(ios->offset), _LLU(ios->length), + first_dev); + } else if (ios->kern_buff) { + int ret = osd_req_read_kern(or, &ios->obj, ios->offset, + ios->kern_buff, ios->length); + + EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " + "length=0x%llx dev=%d ret=>%d\n", + _LLU(ios->obj.id), _LLU(ios->offset), + _LLU(ios->length), first_dev, ret); + if (unlikely(ret)) + return ret; + } else { + osd_req_get_attributes(or, &ios->obj); + EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", + _LLU(ios->obj.id), ios->in_attr_len, first_dev); + } - if (ios->out_attr) - osd_req_add_set_attr_list(or, ios->out_attr, - ios->out_attr_len); + if (ios->out_attr) + osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len); - if (ios->in_attr) - osd_req_add_get_attr_list(or, ios->in_attr, - ios->in_attr_len); - } - ret = exofs_io_execute(ios); + if (ios->in_attr) + osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len); -out: - return ret; + return exofs_io_execute(ios); } int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) -- cgit v1.2.3 From d9c740d2253e75db8cef8f87a3125c450f3ebd82 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 28 Jan 2010 11:58:08 +0200 Subject: exofs: Define on-disk per-inode optional layout attribute * Layouts describe the way a file is spread on multiple devices. The layout information is stored in the objects attribute introduced in this patch. * There can be multiple generating function for the layout. Currently defined: - No attribute present - use below moving-window on global device table, all devices. (This is the only one currently used in exofs) - an obj_id generated moving window - the obj_id is a randomizing factor in the otherwise global map layout. - An explicit layout stored, including a data_map and a device index list. - More might be defined in future ... * There are two attributes defined of the same structure: A-data-files-layout - This layout is used by data-files. If present at a directory, all files of that directory will be created with this layout. A-meta-data-layout - This layout is used by a directory and other meta-data information. Also inherited at creation of subdirectories. * At creation time inodes are created with the layout specified above. A usermode utility may change the creation layout on a give directory or file. Which in the case of directories, will also apply to newly created files/subdirectories, children of that directory. In the simple unaltered case of a newly created exofs, no layout attributes are present, and all layouts adhere to the layout specified at the device-table. * In case of a future file system loaded in an old exofs-driver. At iget(), the generating_function is inspected and if not supported will return an IO error to the application and the inode will not be loaded. So not to damage any data. Note: After this patch we do not yet support any type of layout only the RAID0 patch that enables striping at the super-block level will add support for RAID0 layouts above. This way we are past and future compatible and fully bisectable. * Access to the device table is done by an accessor since it will change according to above information. Signed-off-by: Boaz Harrosh --- fs/exofs/ios.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'fs/exofs/ios.c') diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 4f679317ca54..2b81f99fd62c 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -107,6 +107,19 @@ void exofs_put_io_state(struct exofs_io_state *ios) } } +unsigned exofs_layout_od_id(struct exofs_layout *layout, + osd_id obj_no, unsigned layout_index) +{ + return layout_index; +} + +static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios, + unsigned layout_index) +{ + return ios->layout->s_ods[ + exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)]; +} + static void _sync_done(struct exofs_io_state *ios, void *p) { struct completion *waiting = p; @@ -242,7 +255,7 @@ int exofs_sbi_create(struct exofs_io_state *ios) for (i = 0; i < ios->layout->s_numdevs; i++) { struct osd_request *or; - or = osd_start_request(ios->layout->s_ods[i], GFP_KERNEL); + or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -266,7 +279,7 @@ int exofs_sbi_remove(struct exofs_io_state *ios) for (i = 0; i < ios->layout->s_numdevs; i++) { struct osd_request *or; - or = osd_start_request(ios->layout->s_ods[i], GFP_KERNEL); + or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -290,7 +303,7 @@ int exofs_sbi_write(struct exofs_io_state *ios) for (i = 0; i < ios->layout->s_numdevs; i++) { struct osd_request *or; - or = osd_start_request(ios->layout->s_ods[i], GFP_KERNEL); + or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; @@ -361,7 +374,7 @@ int exofs_sbi_read(struct exofs_io_state *ios) unsigned first_dev = (unsigned)ios->obj.id; first_dev %= ios->layout->s_numdevs; - or = osd_start_request(ios->layout->s_ods[first_dev], GFP_KERNEL); + or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); return -ENOMEM; @@ -442,7 +455,7 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) for (i = 0; i < sbi->layout.s_numdevs; i++) { struct osd_request *or; - or = osd_start_request(sbi->layout.s_ods[i], GFP_KERNEL); + or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; -- cgit v1.2.3 From 5d952b8391692553c31e620a92d6e09262a9a307 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Mon, 1 Feb 2010 13:35:51 +0200 Subject: exofs: RAID0 support We now support striping over mirror devices. Including variable sized stripe_unit. Some limits: * stripe_unit must be a multiple of PAGE_SIZE * stripe_unit * stripe_count is maximum upto 32-bit (4Gb) Tested RAID0 over mirrors, RAID0 only, mirrors only. All check. Design notes: * I'm not using a vectored raid-engine mechanism yet. Following the pnfs-objects-layout data-map structure, "Mirror" is just a private case of "group_width" == 1, and RAID0 is a private case of "Mirrors" == 1. The performance lose of the general case over the particular special case optimization is totally negligible, also considering the extra code size. * In general I added a prepare_stripes() stage that divides the to-be-io pages to the participating devices, the previous exofs_ios_write/read, now becomes _write/read_mirrors and a new write/read upper layer loops on all devices calling _write/read_mirrors. Effectively the prepare_stripes stage is the all secret. Also truncate need fixing to accommodate for striping. * In a RAID0 arrangement, in a regular usage scenario, if all inode layouts will start at the same device, the small files fill up the first device and the later devices stay empty, the farther the device the emptier it is. To fix that, each inode will start at a different stripe_unit, according to it's obj_id modulus number-of-stripe-units. And will then span all stripe-units in the same incrementing order wrapping back to the beginning of the device table. We call it a stripe-units moving window. Special consideration was taken to keep all devices in a mirror arrangement identical. So a broken osd-device could just be cloned from one of the mirrors and no FS scrubbing is needed. (We do that by rotating stripe-unit at a time and not a single device at a time.) TODO: We no longer verify object_length == inode->i_size in exofs_iget. (since i_size is stripped on multiple objects now). I should introduce a multiple-device attribute reading, and use it in exofs_iget. Signed-off-by: Boaz Harrosh --- fs/exofs/ios.c | 327 +++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 273 insertions(+), 54 deletions(-) (limited to 'fs/exofs/ios.c') diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 2b81f99fd62c..6e446b2670b9 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -23,6 +23,7 @@ */ #include +#include #include "exofs.h" @@ -110,7 +111,17 @@ void exofs_put_io_state(struct exofs_io_state *ios) unsigned exofs_layout_od_id(struct exofs_layout *layout, osd_id obj_no, unsigned layout_index) { - return layout_index; +/* switch (layout->lay_func) { + case LAYOUT_MOVING_WINDOW: + {*/ + unsigned dev_mod = obj_no; + + return (layout_index + dev_mod * layout->mirrors_p1) % + layout->s_numdevs; +/* } + case LAYOUT_FUNC_IMPLICT: + return layout->devs[layout_index]; + }*/ } static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios, @@ -225,8 +236,8 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) _clear_bio(ios->per_dev[i].bio); EXOFS_DBGMSG("start read offset passed end of file " "offset=0x%llx, length=0x%llx\n", - _LLU(ios->offset), - _LLU(ios->length)); + _LLU(ios->per_dev[i].offset), + _LLU(ios->per_dev[i].length)); continue; /* we recovered */ } @@ -248,6 +259,127 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) return acumulated_lin_err; } +/* REMOVEME: After review + Some quoteing from the standard + + L = logical offset into the file + W = number of data components in a stripe + S = W * stripe_unit (S is Stripe length) + N = L / S (N is the stripe Number) + C = (L-(N*S)) / stripe_unit (C is the component) + O = (N*stripe_unit)+(L%stripe_unit) (O is the object's offset) +*/ + +static void _offset_dev_unit_off(struct exofs_io_state *ios, u64 file_offset, + u64 *obj_offset, unsigned *dev, unsigned *unit_off) +{ + unsigned stripe_unit = ios->layout->stripe_unit; + unsigned stripe_length = stripe_unit * ios->layout->group_width; + u64 stripe_no = file_offset; + unsigned stripe_mod = do_div(stripe_no, stripe_length); + + *unit_off = stripe_mod % stripe_unit; + *obj_offset = stripe_no * stripe_unit + *unit_off; + *dev = stripe_mod / stripe_unit * ios->layout->mirrors_p1; +} + +static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_bvec, + struct exofs_per_dev_state *per_dev, int cur_len) +{ + unsigned bv = *cur_bvec; + struct request_queue *q = + osd_request_queue(exofs_ios_od(ios, per_dev->dev)); + + per_dev->length += cur_len; + + if (per_dev->bio == NULL) { + unsigned pages_in_stripe = ios->layout->group_width * + (ios->layout->stripe_unit / PAGE_SIZE); + unsigned bio_size = (ios->bio->bi_vcnt + pages_in_stripe) / + ios->layout->group_width; + + per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); + if (unlikely(!per_dev->bio)) { + EXOFS_DBGMSG("Faild to allocate BIO size=%u\n", + bio_size); + return -ENOMEM; + } + } + + while (cur_len > 0) { + int added_len; + struct bio_vec *bvec = &ios->bio->bi_io_vec[bv]; + + BUG_ON(ios->bio->bi_vcnt <= bv); + cur_len -= bvec->bv_len; + + added_len = bio_add_pc_page(q, per_dev->bio, bvec->bv_page, + bvec->bv_len, bvec->bv_offset); + if (unlikely(bvec->bv_len != added_len)) + return -ENOMEM; + ++bv; + } + BUG_ON(cur_len); + + *cur_bvec = bv; + return 0; +} + +static int _prepare_for_striping(struct exofs_io_state *ios) +{ + u64 length = ios->length; + u64 offset = ios->offset; + unsigned stripe_unit = ios->layout->stripe_unit; + unsigned comp = 0; + unsigned stripes = 0; + unsigned cur_bvec = 0; + int ret; + + if (!ios->bio) { + if (ios->kern_buff) { + struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; + unsigned unit_off; + + _offset_dev_unit_off(ios, offset, &per_dev->offset, + &per_dev->dev, &unit_off); + /* no cross device without page array */ + BUG_ON((ios->layout->group_width > 1) && + (unit_off + length > stripe_unit)); + } + ios->numdevs = ios->layout->mirrors_p1; + return 0; + } + + while (length) { + struct exofs_per_dev_state *per_dev = &ios->per_dev[comp]; + unsigned cur_len; + + if (!per_dev->length) { + unsigned unit_off; + + _offset_dev_unit_off(ios, offset, &per_dev->offset, + &per_dev->dev, &unit_off); + stripes++; + cur_len = min_t(u64, stripe_unit - unit_off, length); + offset += cur_len; + } else { + cur_len = min_t(u64, stripe_unit, length); + } + + ret = _add_stripe_unit(ios, &cur_bvec, per_dev, cur_len); + if (unlikely(ret)) + goto out; + + comp += ios->layout->mirrors_p1; + comp %= ios->layout->s_numdevs; + + length -= cur_len; + } +out: + ios->numdevs = stripes * ios->layout->mirrors_p1; + return ret; +} + int exofs_sbi_create(struct exofs_io_state *ios) { int i, ret; @@ -296,61 +428,71 @@ out: return ret; } -int exofs_sbi_write(struct exofs_io_state *ios) +static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) { - int i, ret; + struct exofs_per_dev_state *master_dev = &ios->per_dev[cur_comp]; + unsigned dev = ios->per_dev[cur_comp].dev; + unsigned last_comp = cur_comp + ios->layout->mirrors_p1; + int ret = 0; - for (i = 0; i < ios->layout->s_numdevs; i++) { + for (; cur_comp < last_comp; ++cur_comp, ++dev) { + struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; struct osd_request *or; - or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); + or = osd_start_request(exofs_ios_od(ios, dev), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; goto out; } - ios->per_dev[i].or = or; - ios->numdevs++; + per_dev->or = or; + per_dev->offset = master_dev->offset; if (ios->bio) { struct bio *bio; - if (i != 0) { + if (per_dev != master_dev) { bio = bio_kmalloc(GFP_KERNEL, - ios->bio->bi_max_vecs); + master_dev->bio->bi_max_vecs); if (unlikely(!bio)) { EXOFS_DBGMSG( "Faild to allocate BIO size=%u\n", - ios->bio->bi_max_vecs); + master_dev->bio->bi_max_vecs); ret = -ENOMEM; goto out; } - __bio_clone(bio, ios->bio); + __bio_clone(bio, master_dev->bio); bio->bi_bdev = NULL; bio->bi_next = NULL; - ios->per_dev[i].bio = bio; + per_dev->length = master_dev->length; + per_dev->bio = bio; + per_dev->dev = dev; } else { - bio = ios->bio; + bio = master_dev->bio; + /* FIXME: bio_set_dir() */ + bio->bi_rw |= (1 << BIO_RW); } - osd_req_write(or, &ios->obj, ios->offset, bio, - ios->length); + osd_req_write(or, &ios->obj, per_dev->offset, bio, + per_dev->length); EXOFS_DBGMSG("write(0x%llx) offset=0x%llx " "length=0x%llx dev=%d\n", - _LLU(ios->obj.id), _LLU(ios->offset), - _LLU(ios->length), i); + _LLU(ios->obj.id), _LLU(per_dev->offset), + _LLU(per_dev->length), dev); } else if (ios->kern_buff) { - osd_req_write_kern(or, &ios->obj, ios->offset, + ret = osd_req_write_kern(or, &ios->obj, per_dev->offset, ios->kern_buff, ios->length); + if (unlikely(ret)) + goto out; EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx " "length=0x%llx dev=%d\n", - _LLU(ios->obj.id), _LLU(ios->offset), - _LLU(ios->length), i); + _LLU(ios->obj.id), _LLU(per_dev->offset), + _LLU(ios->length), dev); } else { osd_req_set_attributes(or, &ios->obj); EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", - _LLU(ios->obj.id), ios->out_attr_len, i); + _LLU(ios->obj.id), ios->out_attr_len, dev); } if (ios->out_attr) @@ -361,40 +503,57 @@ int exofs_sbi_write(struct exofs_io_state *ios) osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len); } - ret = exofs_io_execute(ios); out: return ret; } -int exofs_sbi_read(struct exofs_io_state *ios) +int exofs_sbi_write(struct exofs_io_state *ios) +{ + int i; + int ret; + + ret = _prepare_for_striping(ios); + if (unlikely(ret)) + return ret; + + for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { + ret = _sbi_write_mirror(ios, i); + if (unlikely(ret)) + return ret; + } + + ret = exofs_io_execute(ios); + return ret; +} + +static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp) { struct osd_request *or; - struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; + struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; unsigned first_dev = (unsigned)ios->obj.id; - first_dev %= ios->layout->s_numdevs; + first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL); if (unlikely(!or)) { EXOFS_ERR("%s: osd_start_request failed\n", __func__); return -ENOMEM; } per_dev->or = or; - ios->numdevs++; if (ios->bio) { - osd_req_read(or, &ios->obj, ios->offset, ios->bio, ios->length); + osd_req_read(or, &ios->obj, per_dev->offset, + per_dev->bio, per_dev->length); EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" " dev=%d\n", _LLU(ios->obj.id), - _LLU(ios->offset), _LLU(ios->length), + _LLU(per_dev->offset), _LLU(per_dev->length), first_dev); } else if (ios->kern_buff) { - int ret = osd_req_read_kern(or, &ios->obj, ios->offset, + int ret = osd_req_read_kern(or, &ios->obj, per_dev->offset, ios->kern_buff, ios->length); - EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " "length=0x%llx dev=%d ret=>%d\n", - _LLU(ios->obj.id), _LLU(ios->offset), + _LLU(ios->obj.id), _LLU(per_dev->offset), _LLU(ios->length), first_dev, ret); if (unlikely(ret)) return ret; @@ -403,14 +562,32 @@ int exofs_sbi_read(struct exofs_io_state *ios) EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", _LLU(ios->obj.id), ios->in_attr_len, first_dev); } - if (ios->out_attr) osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len); if (ios->in_attr) osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len); - return exofs_io_execute(ios); + return 0; +} + +int exofs_sbi_read(struct exofs_io_state *ios) +{ + int i; + int ret; + + ret = _prepare_for_striping(ios); + if (unlikely(ret)) + return ret; + + for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { + ret = _sbi_read_mirror(ios, i); + if (unlikely(ret)) + return ret; + } + + ret = exofs_io_execute(ios); + return ret; } int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) @@ -434,42 +611,84 @@ int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) return -EIO; } +static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp, + struct osd_attr *attr) +{ + int last_comp = cur_comp + ios->layout->mirrors_p1; + + for (; cur_comp < last_comp; ++cur_comp) { + struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; + struct osd_request *or; + + or = osd_start_request(exofs_ios_od(ios, cur_comp), GFP_KERNEL); + if (unlikely(!or)) { + EXOFS_ERR("%s: osd_start_request failed\n", __func__); + return -ENOMEM; + } + per_dev->or = or; + + osd_req_set_attributes(or, &ios->obj); + osd_req_add_set_attr_list(or, attr, 1); + } + + return 0; +} + int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) { struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; struct exofs_io_state *ios; - struct osd_attr attr; - __be64 newsize; + struct exofs_trunc_attr { + struct osd_attr attr; + __be64 newsize; + } *size_attrs; + u64 this_obj_size; + unsigned dev; + unsigned unit_off; int i, ret; - if (exofs_get_io_state(&sbi->layout, &ios)) - return -ENOMEM; + ret = exofs_get_io_state(&sbi->layout, &ios); + if (unlikely(ret)) + return ret; + + size_attrs = kcalloc(ios->layout->group_width, sizeof(*size_attrs), + GFP_KERNEL); + if (unlikely(!size_attrs)) { + ret = -ENOMEM; + goto out; + } ios->obj.id = exofs_oi_objno(oi); ios->cred = oi->i_cred; - newsize = cpu_to_be64(size); - attr = g_attr_logical_length; - attr.val_ptr = &newsize; + ios->numdevs = ios->layout->s_numdevs; + _offset_dev_unit_off(ios, size, &this_obj_size, &dev, &unit_off); - for (i = 0; i < sbi->layout.s_numdevs; i++) { - struct osd_request *or; + for (i = 0; i < ios->layout->group_width; ++i) { + struct exofs_trunc_attr *size_attr = &size_attrs[i]; + u64 obj_size; - or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); - if (unlikely(!or)) { - EXOFS_ERR("%s: osd_start_request failed\n", __func__); - ret = -ENOMEM; - goto out; - } - ios->per_dev[i].or = or; - ios->numdevs++; + if (i < dev) + obj_size = this_obj_size + + ios->layout->stripe_unit - unit_off; + else if (i == dev) + obj_size = this_obj_size; + else /* i > dev */ + obj_size = this_obj_size - unit_off; - osd_req_set_attributes(or, &ios->obj); - osd_req_add_set_attr_list(or, &attr, 1); + size_attr->newsize = cpu_to_be64(obj_size); + size_attr->attr = g_attr_logical_length; + size_attr->attr.val_ptr = &size_attr->newsize; + + ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, + &size_attr->attr); + if (unlikely(ret)) + goto out; } ret = exofs_io_execute(ios); out: + kfree(size_attrs); exofs_put_io_state(ios); return ret; } -- cgit v1.2.3 From 86093aaff5be5b214613eb60553e236bdb389c84 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 28 Jan 2010 18:24:06 +0200 Subject: exofs: convert io_state to use pages array instead of bio at input * inode.c operations are full-pages based, and not actually true scatter-gather * Lets us use more pages at once upto 512 (from 249) in 64 bit * Brings us much much closer to be able to use exofs's io_state engine from objlayout driver. (Once I decide where to put the common code) After RAID0 patch the outer (input) bio was never used as a bio, but was simply a page carrier into the raid engine. Even in the simple mirror/single-dev arrangement pages info was copied into a second bio. It is now easer to just pass a pages array into the io_state and prepare bio(s) once. Signed-off-by: Boaz Harrosh --- fs/exofs/ios.c | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) (limited to 'fs/exofs/ios.c') diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 6e446b2670b9..263052c77f41 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -283,10 +283,11 @@ static void _offset_dev_unit_off(struct exofs_io_state *ios, u64 file_offset, *dev = stripe_mod / stripe_unit * ios->layout->mirrors_p1; } -static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_bvec, - struct exofs_per_dev_state *per_dev, int cur_len) +static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, + unsigned pgbase, struct exofs_per_dev_state *per_dev, + int cur_len) { - unsigned bv = *cur_bvec; + unsigned pg = *cur_pg; struct request_queue *q = osd_request_queue(exofs_ios_od(ios, per_dev->dev)); @@ -295,7 +296,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_bvec, if (per_dev->bio == NULL) { unsigned pages_in_stripe = ios->layout->group_width * (ios->layout->stripe_unit / PAGE_SIZE); - unsigned bio_size = (ios->bio->bi_vcnt + pages_in_stripe) / + unsigned bio_size = (ios->nr_pages + pages_in_stripe) / ios->layout->group_width; per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); @@ -307,21 +308,22 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_bvec, } while (cur_len > 0) { - int added_len; - struct bio_vec *bvec = &ios->bio->bi_io_vec[bv]; + unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); + unsigned added_len; - BUG_ON(ios->bio->bi_vcnt <= bv); - cur_len -= bvec->bv_len; + BUG_ON(ios->nr_pages <= pg); + cur_len -= pglen; - added_len = bio_add_pc_page(q, per_dev->bio, bvec->bv_page, - bvec->bv_len, bvec->bv_offset); - if (unlikely(bvec->bv_len != added_len)) + added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg], + pglen, pgbase); + if (unlikely(pglen != added_len)) return -ENOMEM; - ++bv; + pgbase = 0; + ++pg; } BUG_ON(cur_len); - *cur_bvec = bv; + *cur_pg = pg; return 0; } @@ -332,10 +334,10 @@ static int _prepare_for_striping(struct exofs_io_state *ios) unsigned stripe_unit = ios->layout->stripe_unit; unsigned comp = 0; unsigned stripes = 0; - unsigned cur_bvec = 0; - int ret; + unsigned cur_pg = 0; + int ret = 0; - if (!ios->bio) { + if (!ios->pages) { if (ios->kern_buff) { struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; unsigned unit_off; @@ -352,7 +354,7 @@ static int _prepare_for_striping(struct exofs_io_state *ios) while (length) { struct exofs_per_dev_state *per_dev = &ios->per_dev[comp]; - unsigned cur_len; + unsigned cur_len, page_off; if (!per_dev->length) { unsigned unit_off; @@ -362,11 +364,15 @@ static int _prepare_for_striping(struct exofs_io_state *ios) stripes++; cur_len = min_t(u64, stripe_unit - unit_off, length); offset += cur_len; + page_off = unit_off & ~PAGE_MASK; + BUG_ON(page_off != ios->pgbase); } else { cur_len = min_t(u64, stripe_unit, length); + page_off = 0; } - ret = _add_stripe_unit(ios, &cur_bvec, per_dev, cur_len); + ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, + cur_len); if (unlikely(ret)) goto out; @@ -448,7 +454,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) per_dev->or = or; per_dev->offset = master_dev->offset; - if (ios->bio) { + if (ios->pages) { struct bio *bio; if (per_dev != master_dev) { @@ -541,7 +547,7 @@ static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp) } per_dev->or = or; - if (ios->bio) { + if (ios->pages) { osd_req_read(or, &ios->obj, per_dev->offset, per_dev->bio, per_dev->length); EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" -- cgit v1.2.3 From b367e78bd1c7af4c018ce98b1f6d3e001aba895a Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 7 Feb 2010 19:18:58 +0200 Subject: exofs: Prepare for groups * Rename _offset_dev_unit_off() to _calc_stripe_info() and recieve a struct for the output params * In _prepare_for_striping we only need to call _calc_stripe_info() once. The other componets are easy to calculate from that. This code was inspired by what's done in truncate. * Some code shifts that make sense now but will make more sense when group support is added. Signed-off-by: Boaz Harrosh --- fs/exofs/ios.c | 159 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 99 insertions(+), 60 deletions(-) (limited to 'fs/exofs/ios.c') diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 263052c77f41..d28febdf54ab 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -259,28 +259,46 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) return acumulated_lin_err; } -/* REMOVEME: After review - Some quoteing from the standard - - L = logical offset into the file - W = number of data components in a stripe - S = W * stripe_unit (S is Stripe length) - N = L / S (N is the stripe Number) - C = (L-(N*S)) / stripe_unit (C is the component) - O = (N*stripe_unit)+(L%stripe_unit) (O is the object's offset) -*/ - -static void _offset_dev_unit_off(struct exofs_io_state *ios, u64 file_offset, - u64 *obj_offset, unsigned *dev, unsigned *unit_off) +/* + * L - logical offset into the file + * + * U - The number of bytes in a full stripe + * + * U = stripe_unit * group_width + * + * N - The stripe number + * + * N = L / U + * + * C - The component index coresponding to L + * + * C = (L - (N*U)) / stripe_unit + * + * O - The component offset coresponding to L + * + * (N*stripe_unit)+(L%stripe_unit) + */ + +struct _striping_info { + u64 obj_offset; + unsigned dev; + unsigned unit_off; +}; + +static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, + struct _striping_info *si) { - unsigned stripe_unit = ios->layout->stripe_unit; - unsigned stripe_length = stripe_unit * ios->layout->group_width; - u64 stripe_no = file_offset; - unsigned stripe_mod = do_div(stripe_no, stripe_length); + u32 stripe_unit = ios->layout->stripe_unit; + u32 group_width = ios->layout->group_width; + u32 U = stripe_unit * group_width; + + u32 LmodU; + u64 N = div_u64_rem(file_offset, U, &LmodU); - *unit_off = stripe_mod % stripe_unit; - *obj_offset = stripe_no * stripe_unit + *unit_off; - *dev = stripe_mod / stripe_unit * ios->layout->mirrors_p1; + si->unit_off = LmodU % stripe_unit; + si->obj_offset = N * stripe_unit + si->unit_off; + si->dev = LmodU / stripe_unit; + si->dev *= ios->layout->mirrors_p1; } static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, @@ -327,65 +345,88 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, return 0; } -static int _prepare_for_striping(struct exofs_io_state *ios) +static int _prepare_pages(struct exofs_io_state *ios, + struct _striping_info *si) { u64 length = ios->length; - u64 offset = ios->offset; unsigned stripe_unit = ios->layout->stripe_unit; + unsigned mirrors_p1 = ios->layout->mirrors_p1; + unsigned dev = si->dev; unsigned comp = 0; unsigned stripes = 0; unsigned cur_pg = 0; int ret = 0; - if (!ios->pages) { - if (ios->kern_buff) { - struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; - unsigned unit_off; - - _offset_dev_unit_off(ios, offset, &per_dev->offset, - &per_dev->dev, &unit_off); - /* no cross device without page array */ - BUG_ON((ios->layout->group_width > 1) && - (unit_off + length > stripe_unit)); - } - ios->numdevs = ios->layout->mirrors_p1; - return 0; - } - while (length) { struct exofs_per_dev_state *per_dev = &ios->per_dev[comp]; - unsigned cur_len, page_off; + unsigned cur_len, page_off = 0; if (!per_dev->length) { - unsigned unit_off; + per_dev->dev = dev; + if (dev < si->dev) { + per_dev->offset = si->obj_offset + stripe_unit - + si->unit_off; + cur_len = stripe_unit; + } else if (dev == si->dev) { + per_dev->offset = si->obj_offset; + cur_len = stripe_unit - si->unit_off; + page_off = si->unit_off & ~PAGE_MASK; + BUG_ON(page_off && (page_off != ios->pgbase)); + } else { /* dev > si->dev */ + per_dev->offset = si->obj_offset - si->unit_off; + cur_len = stripe_unit; + } - _offset_dev_unit_off(ios, offset, &per_dev->offset, - &per_dev->dev, &unit_off); stripes++; - cur_len = min_t(u64, stripe_unit - unit_off, length); - offset += cur_len; - page_off = unit_off & ~PAGE_MASK; - BUG_ON(page_off != ios->pgbase); + + dev += mirrors_p1; + dev %= ios->layout->s_numdevs; } else { - cur_len = min_t(u64, stripe_unit, length); - page_off = 0; + cur_len = stripe_unit; } + if (cur_len >= length) + cur_len = length; ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, cur_len); if (unlikely(ret)) goto out; - comp += ios->layout->mirrors_p1; + comp += mirrors_p1; comp %= ios->layout->s_numdevs; length -= cur_len; } out: - ios->numdevs = stripes * ios->layout->mirrors_p1; + ios->numdevs = stripes * mirrors_p1; return ret; } +static int _prepare_for_striping(struct exofs_io_state *ios) +{ + struct _striping_info si; + + _calc_stripe_info(ios, ios->offset, &si); + + if (!ios->pages) { + if (ios->kern_buff) { + struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; + + per_dev->offset = si.obj_offset; + per_dev->dev = si.dev; + + /* no cross device without page array */ + BUG_ON((ios->layout->group_width > 1) && + (si.unit_off + ios->length > + ios->layout->stripe_unit)); + } + ios->numdevs = ios->layout->mirrors_p1; + return 0; + } + + return _prepare_pages(ios, &si); +} + int exofs_sbi_create(struct exofs_io_state *ios) { int i, ret; @@ -648,9 +689,7 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) struct osd_attr attr; __be64 newsize; } *size_attrs; - u64 this_obj_size; - unsigned dev; - unsigned unit_off; + struct _striping_info si; int i, ret; ret = exofs_get_io_state(&sbi->layout, &ios); @@ -668,19 +707,19 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) ios->cred = oi->i_cred; ios->numdevs = ios->layout->s_numdevs; - _offset_dev_unit_off(ios, size, &this_obj_size, &dev, &unit_off); + _calc_stripe_info(ios, size, &si); for (i = 0; i < ios->layout->group_width; ++i) { struct exofs_trunc_attr *size_attr = &size_attrs[i]; u64 obj_size; - if (i < dev) - obj_size = this_obj_size + - ios->layout->stripe_unit - unit_off; - else if (i == dev) - obj_size = this_obj_size; - else /* i > dev */ - obj_size = this_obj_size - unit_off; + if (i < si.dev) + obj_size = si.obj_offset + + ios->layout->stripe_unit - si.unit_off; + else if (i == si.dev) + obj_size = si.obj_offset; + else /* i > si.dev */ + obj_size = si.obj_offset - si.unit_off; size_attr->newsize = cpu_to_be64(obj_size); size_attr->attr = g_attr_logical_length; -- cgit v1.2.3 From 50a76fd3c352ed2740eba01512efcfceee0703be Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 11 Feb 2010 13:01:39 +0200 Subject: exofs: groups support * _calc_stripe_info() changes to accommodate for grouping calculations. Returns additional information * old _prepare_pages() becomes _prepare_one_group() which stores pages belonging to one device group. * New _prepare_for_striping iterates on all groups calling _prepare_one_group(). * Enable mounting of groups data_maps (group_width != 0) [QUESTION] what is faster A or B; A. x += stride; x = x % width + first_x; B x += stride if (x < last_x) x = first_x; Signed-off-by: Boaz Harrosh --- fs/exofs/ios.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 106 insertions(+), 23 deletions(-) (limited to 'fs/exofs/ios.c') diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index d28febdf54ab..5293bc411d17 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -262,25 +262,50 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) /* * L - logical offset into the file * - * U - The number of bytes in a full stripe + * U - The number of bytes in a stripe within a group * * U = stripe_unit * group_width * - * N - The stripe number + * T - The number of bytes striped within a group of component objects + * (before advancing to the next group) * - * N = L / U + * T = stripe_unit * group_width * group_depth + * + * S - The number of bytes striped across all component objects + * before the pattern repeats + * + * S = stripe_unit * group_width * group_depth * group_count + * + * M - The "major" (i.e., across all components) stripe number + * + * M = L / S + * + * G - Counts the groups from the beginning of the major stripe + * + * G = (L - (M * S)) / T [or (L % S) / T] + * + * H - The byte offset within the group + * + * H = (L - (M * S)) % T [or (L % S) % T] + * + * N - The "minor" (i.e., across the group) stripe number + * + * N = H / U * * C - The component index coresponding to L * - * C = (L - (N*U)) / stripe_unit + * C = (H - (N * U)) / stripe_unit + G * group_width + * [or (L % U) / stripe_unit + G * group_width] * * O - The component offset coresponding to L * - * (N*stripe_unit)+(L%stripe_unit) + * O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit */ - struct _striping_info { u64 obj_offset; + u64 group_length; + u64 total_group_length; + u64 Major; unsigned dev; unsigned unit_off; }; @@ -290,15 +315,35 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, { u32 stripe_unit = ios->layout->stripe_unit; u32 group_width = ios->layout->group_width; + u64 group_depth = ios->layout->group_depth; + u32 U = stripe_unit * group_width; + u64 T = U * group_depth; + u64 S = T * ios->layout->group_count; + u64 M = div64_u64(file_offset, S); + + /* + G = (L - (M * S)) / T + H = (L - (M * S)) % T + */ + u64 LmodS = file_offset - M * S; + u32 G = div64_u64(LmodS, T); + u64 H = LmodS - G * T; + + u32 N = div_u64(H, U); + + /* "H - (N * U)" is just "H % U" so it's bound to u32 */ + si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; + si->dev *= ios->layout->mirrors_p1; - u32 LmodU; - u64 N = div_u64_rem(file_offset, U, &LmodU); + div_u64_rem(file_offset, stripe_unit, &si->unit_off); - si->unit_off = LmodU % stripe_unit; - si->obj_offset = N * stripe_unit + si->unit_off; - si->dev = LmodU / stripe_unit; - si->dev *= ios->layout->mirrors_p1; + si->obj_offset = si->unit_off + (N * stripe_unit) + + (M * group_depth * stripe_unit); + + si->group_length = T - H; + si->total_group_length = T; + si->Major = M; } static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, @@ -345,16 +390,17 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, return 0; } -static int _prepare_pages(struct exofs_io_state *ios, - struct _striping_info *si) +static int _prepare_one_group(struct exofs_io_state *ios, u64 length, + struct _striping_info *si, unsigned first_comp) { - u64 length = ios->length; unsigned stripe_unit = ios->layout->stripe_unit; unsigned mirrors_p1 = ios->layout->mirrors_p1; + unsigned devs_in_group = ios->layout->group_width * mirrors_p1; unsigned dev = si->dev; - unsigned comp = 0; - unsigned stripes = 0; - unsigned cur_pg = 0; + unsigned first_dev = dev - (dev % devs_in_group); + unsigned comp = first_comp + (dev - first_dev); + unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; + unsigned cur_pg = ios->pages_consumed; int ret = 0; while (length) { @@ -377,10 +423,11 @@ static int _prepare_pages(struct exofs_io_state *ios, cur_len = stripe_unit; } - stripes++; + if (max_comp < comp) + max_comp = comp; dev += mirrors_p1; - dev %= ios->layout->s_numdevs; + dev = (dev % devs_in_group) + first_dev; } else { cur_len = stripe_unit; } @@ -393,18 +440,24 @@ static int _prepare_pages(struct exofs_io_state *ios, goto out; comp += mirrors_p1; - comp %= ios->layout->s_numdevs; + comp = (comp % devs_in_group) + first_comp; length -= cur_len; } out: - ios->numdevs = stripes * mirrors_p1; + ios->numdevs = max_comp + mirrors_p1; + ios->pages_consumed = cur_pg; return ret; } static int _prepare_for_striping(struct exofs_io_state *ios) { + u64 length = ios->length; struct _striping_info si; + unsigned devs_in_group = ios->layout->group_width * + ios->layout->mirrors_p1; + unsigned first_comp = 0; + int ret = 0; _calc_stripe_info(ios, ios->offset, &si); @@ -424,7 +477,31 @@ static int _prepare_for_striping(struct exofs_io_state *ios) return 0; } - return _prepare_pages(ios, &si); + while (length) { + if (length < si.group_length) + si.group_length = length; + + ret = _prepare_one_group(ios, si.group_length, &si, first_comp); + if (unlikely(ret)) + goto out; + + length -= si.group_length; + + si.group_length = si.total_group_length; + si.unit_off = 0; + ++si.Major; + si.obj_offset = si.Major * ios->layout->stripe_unit * + ios->layout->group_depth; + + si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group; + si.dev %= ios->layout->s_numdevs; + + first_comp += devs_in_group; + first_comp %= ios->layout->s_numdevs; + } + +out: + return ret; } int exofs_sbi_create(struct exofs_io_state *ios) @@ -482,6 +559,9 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) unsigned last_comp = cur_comp + ios->layout->mirrors_p1; int ret = 0; + if (ios->pages && !master_dev->length) + return 0; /* Just an empty slot */ + for (; cur_comp < last_comp; ++cur_comp, ++dev) { struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; struct osd_request *or; @@ -580,6 +660,9 @@ static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp) struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; unsigned first_dev = (unsigned)ios->obj.id; + if (ios->pages && !per_dev->length) + return 0; /* Just an empty slot */ + first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL); if (unlikely(!or)) { -- cgit v1.2.3