diff options
Diffstat (limited to 'fs')
75 files changed, 1095 insertions, 925 deletions
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 475f9c597cb7..326dc08d3e3f 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -39,27 +39,17 @@ /* #define DEBUG */ -#ifdef DEBUG -#define DPRINTK(fmt, args...) \ -do { \ - printk(KERN_DEBUG "pid %d: %s: " fmt "\n", \ - current->pid, __func__, ##args); \ -} while (0) -#else -#define DPRINTK(fmt, args...) do {} while (0) -#endif - -#define AUTOFS_WARN(fmt, args...) \ -do { \ +#define DPRINTK(fmt, ...) \ + pr_debug("pid %d: %s: " fmt "\n", \ + current->pid, __func__, ##__VA_ARGS__) + +#define AUTOFS_WARN(fmt, ...) \ printk(KERN_WARNING "pid %d: %s: " fmt "\n", \ - current->pid, __func__, ##args); \ -} while (0) + current->pid, __func__, ##__VA_ARGS__) -#define AUTOFS_ERROR(fmt, args...) \ -do { \ +#define AUTOFS_ERROR(fmt, ...) \ printk(KERN_ERR "pid %d: %s: " fmt "\n", \ - current->pid, __func__, ##args); \ -} while (0) + current->pid, __func__, ##__VA_ARGS__) /* Unified info structure. This is pointed to by both the dentry and inode structures. Each file in the filesystem has an instance of this diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 25435987d6ae..e1fbdeef85db 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -104,7 +104,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, size_t pktsz; DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d", - wq->wait_queue_token, wq->name.len, wq->name.name, type); + (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, type); memset(&pkt,0,sizeof pkt); /* For security reasons */ diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 54b8c28bebc8..720d885e8dca 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -474,17 +474,22 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd) befs_data_stream *data = &befs_ino->i_data.ds; befs_off_t len = data->size; - befs_debug(sb, "Follow long symlink"); - - link = kmalloc(len, GFP_NOFS); - if (!link) { - link = ERR_PTR(-ENOMEM); - } else if (befs_read_lsymlink(sb, data, link, len) != len) { - kfree(link); - befs_error(sb, "Failed to read entire long symlink"); + if (len == 0) { + befs_error(sb, "Long symlink with illegal length"); link = ERR_PTR(-EIO); } else { - link[len - 1] = '\0'; + befs_debug(sb, "Follow long symlink"); + + link = kmalloc(len, GFP_NOFS); + if (!link) { + link = ERR_PTR(-ENOMEM); + } else if (befs_read_lsymlink(sb, data, link, len) != len) { + kfree(link); + befs_error(sb, "Failed to read entire long symlink"); + link = ERR_PTR(-EIO); + } else { + link[len - 1] = '\0'; + } } } else { link = befs_ino->i_data.symlink; diff --git a/fs/block_dev.c b/fs/block_dev.c index f28680553288..ff77262e887c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -387,6 +387,10 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) struct inode *bd_inode = filp->f_mapping->host; struct block_device *bdev = I_BDEV(bd_inode); int error; + + error = filemap_write_and_wait_range(filp->f_mapping, start, end); + if (error) + return error; /* * There is no need to serialise calls to blkdev_issue_flush with diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0469263e327e..03912c5c6f49 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1415,17 +1415,15 @@ void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ { \ - type *p = kmap_atomic(eb->first_page, KM_USER0); \ + type *p = page_address(eb->first_page); \ u##bits res = le##bits##_to_cpu(p->member); \ - kunmap_atomic(p, KM_USER0); \ return res; \ } \ static inline void btrfs_set_##name(struct extent_buffer *eb, \ u##bits val) \ { \ - type *p = kmap_atomic(eb->first_page, KM_USER0); \ + type *p = page_address(eb->first_page); \ p->member = cpu_to_le##bits(val); \ - kunmap_atomic(p, KM_USER0); \ } #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ @@ -2367,8 +2365,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); -int btrfs_drop_snapshot(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, int update_ref); +void btrfs_drop_snapshot(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int update_ref); int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 66bac226944e..80d6148f60ac 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1782,6 +1782,9 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, for (i = 0; i < multi->num_stripes; i++, stripe++) { + if (!stripe->dev->can_discard) + continue; + ret = btrfs_issue_discard(stripe->dev->bdev, stripe->physical, stripe->length); @@ -1789,11 +1792,16 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, discarded_bytes += stripe->length; else if (ret != -EOPNOTSUPP) break; + + /* + * Just in case we get back EOPNOTSUPP for some reason, + * just ignore the return value so we don't screw up + * people calling discard_extent. + */ + ret = 0; } kfree(multi); } - if (discarded_bytes && ret == -EOPNOTSUPP) - ret = 0; if (actual_bytes) *actual_bytes = discarded_bytes; @@ -6269,8 +6277,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, * also make sure backrefs for the shared block and all lower level * blocks are properly updated. */ -int btrfs_drop_snapshot(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, int update_ref) +void btrfs_drop_snapshot(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int update_ref) { struct btrfs_path *path; struct btrfs_trans_handle *trans; @@ -6283,13 +6291,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int level; path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + err = -ENOMEM; + goto out; + } wc = kzalloc(sizeof(*wc), GFP_NOFS); if (!wc) { btrfs_free_path(path); - return -ENOMEM; + err = -ENOMEM; + goto out; } trans = btrfs_start_transaction(tree_root, 0); @@ -6318,7 +6329,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, path->lowest_level = 0; if (ret < 0) { err = ret; - goto out; + goto out_free; } WARN_ON(ret > 0); @@ -6425,11 +6436,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, free_extent_buffer(root->commit_root); kfree(root); } -out: +out_free: btrfs_end_transaction_throttle(trans, tree_root); kfree(wc); btrfs_free_path(path); - return err; +out: + if (err) + btrfs_std_error(root->fs_info, err); + return; } /* @@ -6720,6 +6734,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) struct btrfs_space_info *space_info; struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; struct btrfs_device *device; + u64 min_free; + int index; + int dev_nr = 0; + int dev_min = 1; int full = 0; int ret = 0; @@ -6729,8 +6747,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) if (!block_group) return -1; + min_free = btrfs_block_group_used(&block_group->item); + /* no bytes used, we're good */ - if (!btrfs_block_group_used(&block_group->item)) + if (!min_free) goto out; space_info = block_group->space_info; @@ -6746,10 +6766,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) * all of the extents from this block group. If we can, we're good */ if ((space_info->total_bytes != block_group->key.offset) && - (space_info->bytes_used + space_info->bytes_reserved + - space_info->bytes_pinned + space_info->bytes_readonly + - btrfs_block_group_used(&block_group->item) < - space_info->total_bytes)) { + (space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_pinned + space_info->bytes_readonly + + min_free < space_info->total_bytes)) { spin_unlock(&space_info->lock); goto out; } @@ -6766,9 +6785,29 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) if (full) goto out; + /* + * index: + * 0: raid10 + * 1: raid1 + * 2: dup + * 3: raid0 + * 4: single + */ + index = get_block_group_index(block_group); + if (index == 0) { + dev_min = 4; + min_free /= 2; + } else if (index == 1) { + dev_min = 2; + } else if (index == 2) { + min_free *= 2; + } else if (index == 3) { + dev_min = fs_devices->rw_devices; + min_free /= dev_min; + } + mutex_lock(&root->fs_info->chunk_mutex); list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { - u64 min_free = btrfs_block_group_used(&block_group->item); u64 dev_offset; /* @@ -6779,7 +6818,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) ret = find_free_dev_extent(NULL, device, min_free, &dev_offset, NULL); if (!ret) + dev_nr++; + + if (dev_nr >= dev_min) break; + ret = -1; } } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 658d66959abe..e7872e485f13 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -150,6 +150,8 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, spin_lock(&root->fs_info->defrag_inodes_lock); if (!BTRFS_I(inode)->in_defrag) __btrfs_add_inode_defrag(inode, defrag); + else + kfree(defrag); spin_unlock(&root->fs_info->defrag_inodes_lock); return 0; } @@ -1638,11 +1640,15 @@ static long btrfs_fallocate(struct file *file, int mode, cur_offset = alloc_start; while (1) { + u64 actual_end; + em = btrfs_get_extent(inode, NULL, 0, cur_offset, alloc_end - cur_offset, 0); BUG_ON(IS_ERR_OR_NULL(em)); last_byte = min(extent_map_end(em), alloc_end); + actual_end = min_t(u64, extent_map_end(em), offset + len); last_byte = (last_byte + mask) & ~mask; + if (em->block_start == EXTENT_MAP_HOLE || (cur_offset >= inode->i_size && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { @@ -1655,6 +1661,16 @@ static long btrfs_fallocate(struct file *file, int mode, free_extent_map(em); break; } + } else if (actual_end > inode->i_size && + !(mode & FALLOC_FL_KEEP_SIZE)) { + /* + * We didn't need to allocate any more space, but we + * still extended the size of the file so we need to + * update i_size. + */ + inode->i_ctime = CURRENT_TIME; + i_size_write(inode, actual_end); + btrfs_ordered_update_i_size(inode, actual_end, NULL); } free_extent_map(em); @@ -1804,10 +1820,14 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) } } - if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) - return -EINVAL; - if (offset > inode->i_sb->s_maxbytes) - return -EINVAL; + if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) { + ret = -EINVAL; + goto out; + } + if (offset > inode->i_sb->s_maxbytes) { + ret = -EINVAL; + goto out; + } /* Special lock needed here? */ if (offset != file->f_pos) { diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6377713f639c..6a265b9f85f2 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1168,9 +1168,9 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); } -static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, - struct btrfs_free_space *info, u64 offset, - u64 bytes) +static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info, + u64 offset, u64 bytes) { unsigned long start, count; @@ -1181,6 +1181,13 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, bitmap_clear(info->bitmap, start, count); info->bytes -= bytes; +} + +static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info, u64 offset, + u64 bytes) +{ + __bitmap_clear_bits(ctl, info, offset, bytes); ctl->free_space -= bytes; } @@ -1984,7 +1991,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, return 0; ret = search_start; - bitmap_clear_bits(ctl, entry, ret, bytes); + __bitmap_clear_bits(ctl, entry, ret, bytes); return ret; } @@ -2039,7 +2046,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, continue; } } else { - ret = entry->offset; entry->offset += bytes; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 15fceefbca0a..0ccc7438ad34 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7354,11 +7354,15 @@ static int btrfs_set_page_dirty(struct page *page) static int btrfs_permission(struct inode *inode, int mask) { struct btrfs_root *root = BTRFS_I(inode)->root; + umode_t mode = inode->i_mode; - if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) - return -EROFS; - if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) - return -EACCES; + if (mask & MAY_WRITE && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { + if (btrfs_root_readonly(root)) + return -EROFS; + if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) + return -EACCES; + } return generic_permission(inode, mask); } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7cf013349941..970977aab224 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2236,6 +2236,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, btrfs_wait_ordered_range(src, off, len); } + /* truncate page cache pages from target inode range */ + truncate_inode_pages_range(&inode->i_data, off, + ALIGN(off + len, PAGE_CACHE_SIZE) - 1); + /* clone data */ key.objectid = btrfs_ino(src); key.type = BTRFS_EXTENT_DATA_KEY; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index babee65f8eda..786639fca067 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -799,14 +799,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, struct extent_buffer *eb, int slot, struct btrfs_key *key) { - struct inode *dir; - int ret; struct btrfs_inode_ref *ref; + struct btrfs_dir_item *di; + struct inode *dir; struct inode *inode; - char *name; - int namelen; unsigned long ref_ptr; unsigned long ref_end; + char *name; + int namelen; + int ret; int search_done = 0; /* @@ -909,6 +910,25 @@ again: } btrfs_release_path(path); + /* look for a conflicting sequence number */ + di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), + btrfs_inode_ref_index(eb, ref), + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(path); + + /* look for a conflicing name */ + di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(path); + insert: /* insert our name */ ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 53875ae73ad4..f2a4cc79da61 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -142,6 +142,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) unsigned long limit; unsigned long last_waited = 0; int force_reg = 0; + int sync_pending = 0; struct blk_plug plug; /* @@ -229,6 +230,22 @@ loop_lock: BUG_ON(atomic_read(&cur->bi_cnt) == 0); + /* + * if we're doing the sync list, record that our + * plug has some sync requests on it + * + * If we're doing the regular list and there are + * sync requests sitting around, unplug before + * we add more + */ + if (pending_bios == &device->pending_sync_bios) { + sync_pending = 1; + } else if (sync_pending) { + blk_finish_plug(&plug); + blk_start_plug(&plug); + sync_pending = 0; + } + submit_bio(cur->bi_rw, cur); num_run++; batch_run++; @@ -500,6 +517,9 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) fs_devices->rw_devices--; } + if (device->can_discard) + fs_devices->num_can_discard--; + new_device = kmalloc(sizeof(*new_device), GFP_NOFS); BUG_ON(!new_device); memcpy(new_device, device, sizeof(*new_device)); @@ -508,6 +528,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) new_device->bdev = NULL; new_device->writeable = 0; new_device->in_fs_metadata = 0; + new_device->can_discard = 0; list_replace_rcu(&device->dev_list, &new_device->dev_list); call_rcu(&device->rcu, free_device); @@ -547,6 +568,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, fmode_t flags, void *holder) { + struct request_queue *q; struct block_device *bdev; struct list_head *head = &fs_devices->devices; struct btrfs_device *device; @@ -603,6 +625,12 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, seeding = 0; } + q = bdev_get_queue(bdev); + if (blk_queue_discard(q)) { + device->can_discard = 1; + fs_devices->num_can_discard++; + } + device->bdev = bdev; device->in_fs_metadata = 0; device->mode = flags; @@ -835,6 +863,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, max_hole_start = search_start; max_hole_size = 0; + hole_size = 0; if (search_start >= search_end) { ret = -ENOSPC; @@ -917,7 +946,14 @@ next: cond_resched(); } - hole_size = search_end- search_start; + /* + * At this point, search_start should be the end of + * allocated dev extents, and when shrinking the device, + * search_end may be smaller than search_start. + */ + if (search_end > search_start) + hole_size = search_end - search_start; + if (hole_size > max_hole_size) { max_hole_start = search_start; max_hole_size = hole_size; @@ -1543,6 +1579,7 @@ error: int btrfs_init_new_device(struct btrfs_root *root, char *device_path) { + struct request_queue *q; struct btrfs_trans_handle *trans; struct btrfs_device *device; struct block_device *bdev; @@ -1612,6 +1649,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) lock_chunks(root); + q = bdev_get_queue(bdev); + if (blk_queue_discard(q)) + device->can_discard = 1; device->writeable = 1; device->work.func = pending_bios_fn; generate_random_uuid(device->uuid); @@ -1647,6 +1687,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->num_devices++; root->fs_info->fs_devices->open_devices++; root->fs_info->fs_devices->rw_devices++; + if (device->can_discard) + root->fs_info->fs_devices->num_can_discard++; root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; if (!blk_queue_nonrot(bdev_get_queue(bdev))) @@ -2413,9 +2455,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, total_avail = device->total_bytes - device->bytes_used; else total_avail = 0; - /* avail is off by max(alloc_start, 1MB), but that is the same - * for all devices, so it doesn't hurt the sorting later on - */ + + /* If there is no space on this device, skip it. */ + if (total_avail == 0) + continue; ret = find_free_dev_extent(trans, device, max_stripe_size * dev_stripes, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7c12d61ae7ae..6d866db4e177 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -48,6 +48,7 @@ struct btrfs_device { int writeable; int in_fs_metadata; int missing; + int can_discard; spinlock_t io_lock; @@ -104,6 +105,7 @@ struct btrfs_fs_devices { u64 rw_devices; u64 missing_devices; u64 total_rw_bytes; + u64 num_can_discard; struct block_device *latest_bdev; /* all of the devices in the FS, protected by a mutex diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 2fe3cf13b2e9..6d40656e1e29 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -176,7 +176,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) #ifdef CONFIG_CIFS_STATS2 seq_printf(m, " In Send: %d In MaxReq Wait: %d", - atomic_read(&server->inSend), + atomic_read(&server->in_send), atomic_read(&server->num_waiters)); #endif diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 8d8f28c94c0f..6873bb634a97 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -141,10 +141,11 @@ char *cifs_compose_mount_options(const char *sb_mountdata, rc = dns_resolve_server_name_to_ip(*devname, &srvIP); if (rc < 0) { - cERROR(1, "%s: Failed to resolve server part of %s to IP: %d", - __func__, *devname, rc); + cFYI(1, "%s: Failed to resolve server part of %s to IP: %d", + __func__, *devname, rc); goto compose_mount_options_err; } + /* md_len = strlen(...) + 12 for 'sep+prefixpath=' * assuming that we have 'unc=' and 'ip=' in * the original sb_mountdata diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 21de1d6d5849..d0f59faefb78 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -991,24 +991,6 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, return pntsd; } -static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid, - struct cifs_ntsd *pnntsd, u32 acllen) -{ - int xid, rc; - struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); - - if (IS_ERR(tlink)) - return PTR_ERR(tlink); - - xid = GetXid(); - rc = CIFSSMBSetCIFSACL(xid, tlink_tcon(tlink), fid, pnntsd, acllen); - FreeXid(xid); - cifs_put_tlink(tlink); - - cFYI(DBG2, "SetCIFSACL rc = %d", rc); - return rc; -} - static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path, struct cifs_ntsd *pnntsd, u32 acllen) { @@ -1047,18 +1029,10 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, struct inode *inode, const char *path) { struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - struct cifsFileInfo *open_file; - int rc; cFYI(DBG2, "set ACL for %s from mode 0x%x", path, inode->i_mode); - open_file = find_readable_file(CIFS_I(inode), true); - if (!open_file) - return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); - - rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen); - cifsFileInfo_put(open_file); - return rc; + return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); } /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 212e5629cc1d..f93eb948d071 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -563,6 +563,10 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) mutex_unlock(&dir->i_mutex); dput(dentry); dentry = child; + if (!dentry->d_inode) { + dput(dentry); + dentry = ERR_PTR(-ENOENT); + } } while (!IS_ERR(dentry)); _FreeXid(xid); kfree(full_path); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index cb71dc1f94d1..95da8027983d 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -125,5 +125,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "1.74" +#define CIFS_VERSION "1.75" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 38ce6d44b145..95dad9d14cf1 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -291,7 +291,7 @@ struct TCP_Server_Info { struct fscache_cookie *fscache; /* client index cache cookie */ #endif #ifdef CONFIG_CIFS_STATS2 - atomic_t inSend; /* requests trying to send */ + atomic_t in_send; /* requests trying to send */ atomic_t num_waiters; /* blocked waiting to get in sendrecv */ #endif }; @@ -672,12 +672,54 @@ struct mid_q_entry { bool multiEnd:1; /* both received */ }; -struct oplock_q_entry { - struct list_head qhead; - struct inode *pinode; - struct cifs_tcon *tcon; - __u16 netfid; -}; +/* Make code in transport.c a little cleaner by moving + update of optional stats into function below */ +#ifdef CONFIG_CIFS_STATS2 + +static inline void cifs_in_send_inc(struct TCP_Server_Info *server) +{ + atomic_inc(&server->in_send); +} + +static inline void cifs_in_send_dec(struct TCP_Server_Info *server) +{ + atomic_dec(&server->in_send); +} + +static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server) +{ + atomic_inc(&server->num_waiters); +} + +static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server) +{ + atomic_dec(&server->num_waiters); +} + +static inline void cifs_save_when_sent(struct mid_q_entry *mid) +{ + mid->when_sent = jiffies; +} +#else +static inline void cifs_in_send_inc(struct TCP_Server_Info *server) +{ +} +static inline void cifs_in_send_dec(struct TCP_Server_Info *server) +{ +} + +static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server) +{ +} + +static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server) +{ +} + +static inline void cifs_save_when_sent(struct mid_q_entry *mid) +{ +} +#endif /* for pending dnotify requests */ struct dir_notify_req { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 80c2e3add3a2..633c246b6775 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2878,7 +2878,8 @@ cleanup_volume_info_contents(struct smb_vol *volume_info) kfree(volume_info->username); kzfree(volume_info->password); kfree(volume_info->UNC); - kfree(volume_info->UNCip); + if (volume_info->UNCip != volume_info->UNC + 2) + kfree(volume_info->UNCip); kfree(volume_info->domainname); kfree(volume_info->iocharset); kfree(volume_info->prepath); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index ae576fbb5142..72d448bf96ce 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -105,8 +105,8 @@ cifs_bp_rename_retry: } rcu_read_unlock(); if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) { - cERROR(1, "did not end path lookup where expected namelen is %d", - namelen); + cFYI(1, "did not end path lookup where expected. namelen=%d " + "dfsplen=%d", namelen, dfsplen); /* presumably this is only possible if racing with a rename of one of the parent directories (we can not lock the dentries above us to prevent this, but retrying should be harmless) */ diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 548f06230a6d..1d2d91d9bf65 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -79,8 +79,8 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) /* Perform the upcall */ rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL); if (rc < 0) - cERROR(1, "%s: unable to resolve: %*.*s", - __func__, len, len, hostname); + cFYI(1, "%s: unable to resolve: %*.*s", + __func__, len, len, hostname); else cFYI(1, "%s: resolved: %*.*s to %s", __func__, len, len, hostname, *ip_addr); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9b018c8334fa..a7b2dcd4a53e 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -764,20 +764,10 @@ char *cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, if (full_path == NULL) return full_path; - if (dfsplen) { + if (dfsplen) strncpy(full_path, tcon->treeName, dfsplen); - /* switch slash direction in prepath depending on whether - * windows or posix style path names - */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) { - int i; - for (i = 0; i < dfsplen; i++) { - if (full_path[i] == '\\') - full_path[i] = '/'; - } - } - } strncpy(full_path + dfsplen, vol->prepath, pplen); + convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); full_path[dfsplen + pplen] = 0; /* add trailing null */ return full_path; } diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 243d58720513..d3e619692ee0 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -124,8 +124,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB) /* that we use in next few lines */ /* Note that header is initialized to zero in header_assemble */ pSMB->req.AndXCommand = 0xFF; - pSMB->req.MaxBufferSize = cpu_to_le16(min_t(u32, CIFSMaxBufSize - 4, - USHRT_MAX)); + pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf); pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); pSMB->req.VcNumber = get_next_vcnum(ses); diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 147aa22c3c3a..10ca6b2c26b7 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -266,15 +266,11 @@ static int wait_for_free_request(struct TCP_Server_Info *server, while (1) { if (atomic_read(&server->inFlight) >= cifs_max_pending) { spin_unlock(&GlobalMid_Lock); -#ifdef CONFIG_CIFS_STATS2 - atomic_inc(&server->num_waiters); -#endif + cifs_num_waiters_inc(server); wait_event(server->request_q, atomic_read(&server->inFlight) < cifs_max_pending); -#ifdef CONFIG_CIFS_STATS2 - atomic_dec(&server->num_waiters); -#endif + cifs_num_waiters_dec(server); spin_lock(&GlobalMid_Lock); } else { if (server->tcpStatus == CifsExiting) { @@ -362,6 +358,8 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, mid = AllocMidQEntry(hdr, server); if (mid == NULL) { mutex_unlock(&server->srv_mutex); + atomic_dec(&server->inFlight); + wake_up(&server->request_q); return -ENOMEM; } @@ -379,15 +377,13 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, mid->callback = callback; mid->callback_data = cbdata; mid->midState = MID_REQUEST_SUBMITTED; -#ifdef CONFIG_CIFS_STATS2 - atomic_inc(&server->inSend); -#endif + + cifs_in_send_inc(server); rc = smb_sendv(server, iov, nvec); -#ifdef CONFIG_CIFS_STATS2 - atomic_dec(&server->inSend); - mid->when_sent = jiffies; -#endif + cifs_in_send_dec(server); + cifs_save_when_sent(mid); mutex_unlock(&server->srv_mutex); + if (rc) goto out_err; @@ -573,14 +569,10 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, } midQ->midState = MID_REQUEST_SUBMITTED; -#ifdef CONFIG_CIFS_STATS2 - atomic_inc(&ses->server->inSend); -#endif + cifs_in_send_inc(ses->server); rc = smb_sendv(ses->server, iov, n_vec); -#ifdef CONFIG_CIFS_STATS2 - atomic_dec(&ses->server->inSend); - midQ->when_sent = jiffies; -#endif + cifs_in_send_dec(ses->server); + cifs_save_when_sent(midQ); mutex_unlock(&ses->server->srv_mutex); @@ -701,14 +693,11 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, } midQ->midState = MID_REQUEST_SUBMITTED; -#ifdef CONFIG_CIFS_STATS2 - atomic_inc(&ses->server->inSend); -#endif + + cifs_in_send_inc(ses->server); rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); -#ifdef CONFIG_CIFS_STATS2 - atomic_dec(&ses->server->inSend); - midQ->when_sent = jiffies; -#endif + cifs_in_send_dec(ses->server); + cifs_save_when_sent(midQ); mutex_unlock(&ses->server->srv_mutex); if (rc < 0) @@ -841,14 +830,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, } midQ->midState = MID_REQUEST_SUBMITTED; -#ifdef CONFIG_CIFS_STATS2 - atomic_inc(&ses->server->inSend); -#endif + cifs_in_send_inc(ses->server); rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); -#ifdef CONFIG_CIFS_STATS2 - atomic_dec(&ses->server->inSend); - midQ->when_sent = jiffies; -#endif + cifs_in_send_dec(ses->server); + cifs_save_when_sent(midQ); mutex_unlock(&ses->server->srv_mutex); if (rc < 0) { diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 8be086e9abe4..51352de88ef1 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1003,6 +1003,7 @@ COMPATIBLE_IOCTL(PPPIOCCONNECT) COMPATIBLE_IOCTL(PPPIOCDISCONN) COMPATIBLE_IOCTL(PPPIOCATTCHAN) COMPATIBLE_IOCTL(PPPIOCGCHAN) +COMPATIBLE_IOCTL(PPPIOCGL2TPSTATS) /* PPPOX */ COMPATIBLE_IOCTL(PPPOEIOCSFWD) COMPATIBLE_IOCTL(PPPOEIOCDFWD) diff --git a/fs/dcache.c b/fs/dcache.c index c83cae19161e..a88948b8bd17 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1729,7 +1729,7 @@ seqretry: */ if (read_seqcount_retry(&dentry->d_seq, *seq)) goto seqretry; - if (parent->d_flags & DCACHE_OP_COMPARE) { + if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { if (parent->d_op->d_compare(parent, *inode, dentry, i, tlen, tname, name)) diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig index 1cd6d9d3e29a..cc16562654de 100644 --- a/fs/ecryptfs/Kconfig +++ b/fs/ecryptfs/Kconfig @@ -1,6 +1,6 @@ config ECRYPT_FS tristate "eCrypt filesystem layer support (EXPERIMENTAL)" - depends on EXPERIMENTAL && KEYS && CRYPTO + depends on EXPERIMENTAL && KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n) select CRYPTO_ECB select CRYPTO_CBC select CRYPTO_MD5 diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 08a2b52bf565..ac1ad48c2376 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1973,7 +1973,7 @@ pki_encrypt_session_key(struct key *auth_tok_key, { struct ecryptfs_msg_ctx *msg_ctx = NULL; char *payload = NULL; - size_t payload_len; + size_t payload_len = 0; struct ecryptfs_message *msg; int rc; diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 9f1bb747d77d..b4a6befb1216 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -175,6 +175,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only, + ecryptfs_opt_check_dev_ruid, ecryptfs_opt_err }; static const match_table_t tokens = { @@ -191,6 +192,7 @@ static const match_table_t tokens = { {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"}, {ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"}, + {ecryptfs_opt_check_dev_ruid, "ecryptfs_check_dev_ruid"}, {ecryptfs_opt_err, NULL} }; @@ -236,6 +238,7 @@ static void ecryptfs_init_mount_crypt_stat( * ecryptfs_parse_options * @sb: The ecryptfs super block * @options: The options passed to the kernel + * @check_ruid: set to 1 if device uid should be checked against the ruid * * Parse mount options: * debug=N - ecryptfs_verbosity level for debug output @@ -251,7 +254,8 @@ static void ecryptfs_init_mount_crypt_stat( * * Returns zero on success; non-zero on error */ -static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options) +static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, + uid_t *check_ruid) { char *p; int rc = 0; @@ -276,6 +280,8 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options) char *cipher_key_bytes_src; char *fn_cipher_key_bytes_src; + *check_ruid = 0; + if (!options) { rc = -EINVAL; goto out; @@ -380,6 +386,9 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options) mount_crypt_stat->flags |= ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY; break; + case ecryptfs_opt_check_dev_ruid: + *check_ruid = 1; + break; case ecryptfs_opt_err: default: printk(KERN_WARNING @@ -475,6 +484,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags const char *err = "Getting sb failed"; struct inode *inode; struct path path; + uid_t check_ruid; int rc; sbi = kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL); @@ -483,7 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags goto out; } - rc = ecryptfs_parse_options(sbi, raw_data); + rc = ecryptfs_parse_options(sbi, raw_data, &check_ruid); if (rc) { err = "Error parsing options"; goto out; @@ -521,6 +531,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags "known incompatibilities\n"); goto out_free; } + + if (check_ruid && path.dentry->d_inode->i_uid != current_uid()) { + rc = -EPERM; + printk(KERN_ERR "Mount of device (uid: %d) not owned by " + "requested user (uid: %d)\n", + path.dentry->d_inode->i_uid, current_uid()); + goto out_free; + } + ecryptfs_set_superblock_lower(s, path.dentry->d_sb); s->s_maxbytes = path.dentry->d_sb->s_maxbytes; s->s_blocksize = path.dentry->d_sb->s_blocksize; diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 85d430963116..3745f7c2b9c2 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -39,15 +39,16 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, loff_t offset, size_t size) { - struct ecryptfs_inode_info *inode_info; + struct file *lower_file; mm_segment_t fs_save; ssize_t rc; - inode_info = ecryptfs_inode_to_private(ecryptfs_inode); - BUG_ON(!inode_info->lower_file); + lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file; + if (!lower_file) + return -EIO; fs_save = get_fs(); set_fs(get_ds()); - rc = vfs_write(inode_info->lower_file, data, size, &offset); + rc = vfs_write(lower_file, data, size, &offset); set_fs(fs_save); mark_inode_dirty_sync(ecryptfs_inode); return rc; @@ -225,15 +226,16 @@ out: int ecryptfs_read_lower(char *data, loff_t offset, size_t size, struct inode *ecryptfs_inode) { - struct ecryptfs_inode_info *inode_info = - ecryptfs_inode_to_private(ecryptfs_inode); + struct file *lower_file; mm_segment_t fs_save; ssize_t rc; - BUG_ON(!inode_info->lower_file); + lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file; + if (!lower_file) + return -EIO; fs_save = get_fs(); set_fs(get_ds()); - rc = vfs_read(inode_info->lower_file, data, size, &offset); + rc = vfs_read(lower_file, data, size, &offset); set_fs(fs_save); return rc; } diff --git a/fs/exec.c b/fs/exec.c index da80612a35f4..25dcbe5fc356 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1459,6 +1459,23 @@ static int do_execve_common(const char *filename, struct files_struct *displaced; bool clear_in_exec; int retval; + const struct cred *cred = current_cred(); + + /* + * We move the actual failure in case of RLIMIT_NPROC excess from + * set*uid() to execve() because too many poorly written programs + * don't check setuid() return code. Here we additionally recheck + * whether NPROC limit is still exceeded. + */ + if ((current->flags & PF_NPROC_EXCEEDED) && + atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) { + retval = -EAGAIN; + goto out_ret; + } + + /* We're below the limit (still or again), so we don't want to make + * further execve() calls fail. */ + current->flags &= ~PF_NPROC_EXCEEDED; retval = unshare_files(&displaced); if (retval) diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild index 2d0f757fda3e..c5a5855a6c44 100644 --- a/fs/exofs/Kbuild +++ b/fs/exofs/Kbuild @@ -12,5 +12,8 @@ # Kbuild - Gets included from the Kernels Makefile and build system # -exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o +# ore module library +obj-$(CONFIG_ORE) += ore.o + +exofs-y := inode.o file.o symlink.o namei.o dir.o super.o obj-$(CONFIG_EXOFS_FS) += exofs.o diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig index 86194b2f799d..70bae4149291 100644 --- a/fs/exofs/Kconfig +++ b/fs/exofs/Kconfig @@ -1,6 +1,10 @@ +config ORE + tristate + config EXOFS_FS tristate "exofs: OSD based file system support" depends on SCSI_OSD_ULD + select ORE help EXOFS is a file system that uses an OSD storage device, as its backing storage. diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index c965806c2821..f4e442ec7445 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -36,12 +36,9 @@ #include <linux/fs.h> #include <linux/time.h> #include <linux/backing-dev.h> -#include "common.h" +#include <scsi/osd_ore.h> -/* FIXME: Remove once pnfs hits mainline - * #include <linux/exportfs/pnfs_osd_xdr.h> - */ -#include "pnfs.h" +#include "common.h" #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) @@ -56,27 +53,11 @@ /* u64 has problems with printk this will cast it to unsigned long long */ #define _LLU(x) (unsigned long long)(x) -struct exofs_layout { - osd_id s_pid; /* partition ID of file system*/ - - /* Our way of looking at the data_map */ - unsigned stripe_unit; - unsigned mirrors_p1; - - unsigned group_width; - u64 group_depth; - unsigned group_count; - - enum exofs_inode_layout_gen_functions lay_func; - - unsigned s_numdevs; /* Num of devices in array */ - struct osd_dev *s_ods[0]; /* Variable length */ -}; - /* * our extension to the in-memory superblock */ struct exofs_sb_info { + struct backing_dev_info bdi; /* register our bdi with VFS */ struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/ int s_timeout; /* timeout for OSD operations */ uint64_t s_nextid; /* highest object ID used */ @@ -84,16 +65,13 @@ struct exofs_sb_info { spinlock_t s_next_gen_lock; /* spinlock for gen # update */ u32 s_next_generation; /* next gen # to use */ atomic_t s_curr_pending; /* number of pending commands */ - uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ - struct backing_dev_info bdi; /* register our bdi with VFS */ struct pnfs_osd_data_map data_map; /* Default raid to use * FIXME: Needed ? */ -/* struct exofs_layout dir_layout;*/ /* Default dir layout */ - struct exofs_layout layout; /* Default files layout, - * contains the variable osd_dev - * array. Keep last */ + struct ore_layout layout; /* Default files layout */ + struct ore_comp one_comp; /* id & cred of partition id=0*/ + struct ore_components comps; /* comps for the partition */ struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */ }; @@ -107,7 +85,8 @@ struct exofs_i_info { uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/ uint32_t i_dir_start_lookup; /* which page to start lookup */ uint64_t i_commit_size; /* the object's written length */ - uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */ + struct ore_comp one_comp; /* same component for all devices */ + struct ore_components comps; /* inode view of the device table */ }; static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) @@ -115,52 +94,6 @@ static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF; } -struct exofs_io_state; -typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private); - -struct exofs_io_state { - struct kref kref; - - void *private; - exofs_io_done_fn done; - - struct exofs_layout *layout; - struct osd_obj_id obj; - u8 *cred; - - /* Global read/write IO*/ - loff_t offset; - unsigned long length; - void *kern_buff; - - struct page **pages; - unsigned nr_pages; - unsigned pgbase; - unsigned pages_consumed; - - /* Attributes */ - unsigned in_attr_len; - struct osd_attr *in_attr; - unsigned out_attr_len; - struct osd_attr *out_attr; - - /* Variable array of size numdevs */ - unsigned numdevs; - struct exofs_per_dev_state { - struct osd_request *or; - struct bio *bio; - loff_t offset; - unsigned length; - unsigned dev; - } per_dev[]; -}; - -static inline unsigned exofs_io_state_size(unsigned numdevs) -{ - return sizeof(struct exofs_io_state) + - sizeof(struct exofs_per_dev_state) * numdevs; -} - /* * our inode flags */ @@ -205,12 +138,6 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode) } /* - * Given a layout, object_number and stripe_index return the associated global - * dev_index - */ -unsigned exofs_layout_od_id(struct exofs_layout *layout, - osd_id obj_no, unsigned layout_index); -/* * Maximum count of links to a file */ #define EXOFS_LINK_MAX 32000 @@ -219,44 +146,8 @@ unsigned exofs_layout_od_id(struct exofs_layout *layout, * function declarations * *************************/ -/* ios.c */ -void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], - const struct osd_obj_id *obj); -int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, - u64 offset, void *p, unsigned length); - -int exofs_get_io_state(struct exofs_layout *layout, - struct exofs_io_state **ios); -void exofs_put_io_state(struct exofs_io_state *ios); - -int exofs_check_io(struct exofs_io_state *ios, u64 *resid); - -int exofs_sbi_create(struct exofs_io_state *ios); -int exofs_sbi_remove(struct exofs_io_state *ios); -int exofs_sbi_write(struct exofs_io_state *ios); -int exofs_sbi_read(struct exofs_io_state *ios); - -int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr); - -int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len); -static inline int exofs_oi_write(struct exofs_i_info *oi, - struct exofs_io_state *ios) -{ - ios->obj.id = exofs_oi_objno(oi); - ios->cred = oi->i_cred; - return exofs_sbi_write(ios); -} - -static inline int exofs_oi_read(struct exofs_i_info *oi, - struct exofs_io_state *ios) -{ - ios->obj.id = exofs_oi_objno(oi); - ios->cred = oi->i_cred; - return exofs_sbi_read(ios); -} - /* inode.c */ -unsigned exofs_max_io_pages(struct exofs_layout *layout, +unsigned exofs_max_io_pages(struct ore_layout *layout, unsigned expected_pages); int exofs_setattr(struct dentry *, struct iattr *); int exofs_write_begin(struct file *file, struct address_space *mapping, @@ -281,6 +172,8 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *, struct inode *); /* super.c */ +void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], + const struct osd_obj_id *obj); int exofs_sbi_write_stats(struct exofs_sb_info *sbi); /********************* @@ -295,7 +188,6 @@ extern const struct file_operations exofs_file_operations; /* inode.c */ extern const struct address_space_operations exofs_aops; -extern const struct osd_attr g_attr_logical_length; /* namei.c */ extern const struct inode_operations exofs_dir_inode_operations; @@ -305,4 +197,33 @@ extern const struct inode_operations exofs_special_inode_operations; extern const struct inode_operations exofs_symlink_inode_operations; extern const struct inode_operations exofs_fast_symlink_inode_operations; +/* exofs_init_comps will initialize an ore_components device array + * pointing to a single ore_comp struct, and a round-robin view + * of the device table. + * The first device of each inode is the [inode->ino % num_devices] + * and the rest of the devices sequentially following where the + * first device is after the last device. + * It is assumed that the global device array at @sbi is twice + * bigger and that the device table repeats twice. + * See: exofs_read_lookup_dev_table() + */ +static inline void exofs_init_comps(struct ore_components *comps, + struct ore_comp *one_comp, + struct exofs_sb_info *sbi, osd_id oid) +{ + unsigned dev_mod = (unsigned)oid, first_dev; + + one_comp->obj.partition = sbi->one_comp.obj.partition; + one_comp->obj.id = oid; + exofs_make_credential(one_comp->cred, &one_comp->obj); + + comps->numdevs = sbi->comps.numdevs; + comps->single_comp = EC_SINGLE_COMP; + comps->comps = one_comp; + + /* Round robin device view of the table */ + first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->comps.numdevs; + comps->ods = sbi->comps.ods + first_dev; +} + #endif diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 8472c098445d..f39a38fc2349 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -43,7 +43,7 @@ enum { BIO_MAX_PAGES_KMALLOC = PAGE_SIZE / sizeof(struct page *), }; -unsigned exofs_max_io_pages(struct exofs_layout *layout, +unsigned exofs_max_io_pages(struct ore_layout *layout, unsigned expected_pages) { unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC); @@ -58,7 +58,7 @@ struct page_collect { struct exofs_sb_info *sbi; struct inode *inode; unsigned expected_pages; - struct exofs_io_state *ios; + struct ore_io_state *ios; struct page **pages; unsigned alloc_pages; @@ -110,13 +110,6 @@ static int pcol_try_alloc(struct page_collect *pcol) { unsigned pages; - if (!pcol->ios) { /* First time allocate io_state */ - int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); - - if (ret) - return ret; - } - /* TODO: easily support bio chaining */ pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages); @@ -140,7 +133,7 @@ static void pcol_free(struct page_collect *pcol) pcol->pages = NULL; if (pcol->ios) { - exofs_put_io_state(pcol->ios); + ore_put_io_state(pcol->ios); pcol->ios = NULL; } } @@ -200,7 +193,7 @@ static int __readpages_done(struct page_collect *pcol) u64 resid; u64 good_bytes; u64 length = 0; - int ret = exofs_check_io(pcol->ios, &resid); + int ret = ore_check_io(pcol->ios, &resid); if (likely(!ret)) good_bytes = pcol->length; @@ -241,7 +234,7 @@ static int __readpages_done(struct page_collect *pcol) } /* callback of async reads */ -static void readpages_done(struct exofs_io_state *ios, void *p) +static void readpages_done(struct ore_io_state *ios, void *p) { struct page_collect *pcol = p; @@ -269,20 +262,28 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) static int read_exec(struct page_collect *pcol) { struct exofs_i_info *oi = exofs_i(pcol->inode); - struct exofs_io_state *ios = pcol->ios; + struct ore_io_state *ios; struct page_collect *pcol_copy = NULL; int ret; if (!pcol->pages) return 0; + if (!pcol->ios) { + int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, true, + pcol->pg_first << PAGE_CACHE_SHIFT, + pcol->length, &pcol->ios); + + if (ret) + return ret; + } + + ios = pcol->ios; ios->pages = pcol->pages; ios->nr_pages = pcol->nr_pages; - ios->length = pcol->length; - ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; if (pcol->read_4_write) { - exofs_oi_read(oi, pcol->ios); + ore_read(pcol->ios); return __readpages_done(pcol); } @@ -295,14 +296,14 @@ static int read_exec(struct page_collect *pcol) *pcol_copy = *pcol; ios->done = readpages_done; ios->private = pcol_copy; - ret = exofs_oi_read(oi, ios); + ret = ore_read(ios); if (unlikely(ret)) goto err; atomic_inc(&pcol->sbi->s_curr_pending); EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", - ios->obj.id, _LLU(ios->offset), pcol->length); + oi->one_comp.obj.id, _LLU(ios->offset), pcol->length); /* pages ownership was passed to pcol_copy */ _pcol_reset(pcol); @@ -457,14 +458,14 @@ static int exofs_readpage(struct file *file, struct page *page) } /* Callback for osd_write. All writes are asynchronous */ -static void writepages_done(struct exofs_io_state *ios, void *p) +static void writepages_done(struct ore_io_state *ios, void *p) { struct page_collect *pcol = p; int i; u64 resid; u64 good_bytes; u64 length = 0; - int ret = exofs_check_io(ios, &resid); + int ret = ore_check_io(ios, &resid); atomic_dec(&pcol->sbi->s_curr_pending); @@ -507,13 +508,21 @@ static void writepages_done(struct exofs_io_state *ios, void *p) static int write_exec(struct page_collect *pcol) { struct exofs_i_info *oi = exofs_i(pcol->inode); - struct exofs_io_state *ios = pcol->ios; + struct ore_io_state *ios; struct page_collect *pcol_copy = NULL; int ret; if (!pcol->pages) return 0; + BUG_ON(pcol->ios); + ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, false, + pcol->pg_first << PAGE_CACHE_SHIFT, + pcol->length, &pcol->ios); + + if (unlikely(ret)) + goto err; + pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); if (!pcol_copy) { EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n"); @@ -523,16 +532,15 @@ static int write_exec(struct page_collect *pcol) *pcol_copy = *pcol; + ios = pcol->ios; ios->pages = pcol_copy->pages; ios->nr_pages = pcol_copy->nr_pages; - ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; - ios->length = pcol_copy->length; ios->done = writepages_done; ios->private = pcol_copy; - ret = exofs_oi_write(oi, ios); + ret = ore_write(ios); if (unlikely(ret)) { - EXOFS_ERR("write_exec: exofs_oi_write() Failed\n"); + EXOFS_ERR("write_exec: ore_write() Failed\n"); goto err; } @@ -844,17 +852,15 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode) return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); } -const struct osd_attr g_attr_logical_length = ATTR_DEF( - OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); - static int _do_truncate(struct inode *inode, loff_t newsize) { struct exofs_i_info *oi = exofs_i(inode); + struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; int ret; inode->i_mtime = inode->i_ctime = CURRENT_TIME; - ret = exofs_oi_truncate(oi, (u64)newsize); + ret = ore_truncate(&sbi->layout, &oi->comps, (u64)newsize); if (likely(!ret)) truncate_setsize(inode, newsize); @@ -917,30 +923,26 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, [1] = g_attr_inode_file_layout, [2] = g_attr_inode_dir_layout, }; - struct exofs_io_state *ios; + struct ore_io_state *ios; struct exofs_on_disk_inode_layout *layout; int ret; - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); if (unlikely(ret)) { - EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); + EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); return ret; } - ios->obj.id = exofs_oi_objno(oi); - exofs_make_credential(oi->i_cred, &ios->obj); - ios->cred = oi->i_cred; - - attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs); - attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs); + attrs[1].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs); + attrs[2].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs); ios->in_attr = attrs; ios->in_attr_len = ARRAY_SIZE(attrs); - ret = exofs_sbi_read(ios); + ret = ore_read(ios); if (unlikely(ret)) { EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n", - _LLU(ios->obj.id), ret); + _LLU(oi->one_comp.obj.id), ret); memset(inode, 0, sizeof(*inode)); inode->i_mode = 0040000 | (0777 & ~022); /* If object is lost on target we might as well enable it's @@ -990,7 +992,7 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, } out: - exofs_put_io_state(ios); + ore_put_io_state(ios); return ret; } @@ -1016,6 +1018,8 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) return inode; oi = exofs_i(inode); __oi_init(oi); + exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info, + exofs_oi_objno(oi)); /* read the inode from the osd */ ret = exofs_get_inode(sb, oi, &fcb); @@ -1107,21 +1111,22 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi) * set the obj_created flag so that other methods know that the object exists on * the OSD. */ -static void create_done(struct exofs_io_state *ios, void *p) +static void create_done(struct ore_io_state *ios, void *p) { struct inode *inode = p; struct exofs_i_info *oi = exofs_i(inode); struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; int ret; - ret = exofs_check_io(ios, NULL); - exofs_put_io_state(ios); + ret = ore_check_io(ios, NULL); + ore_put_io_state(ios); atomic_dec(&sbi->s_curr_pending); if (unlikely(ret)) { EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx", - _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid)); + _LLU(exofs_oi_objno(oi)), + _LLU(oi->one_comp.obj.partition)); /*TODO: When FS is corrupted creation can fail, object already * exist. Get rid of this asynchronous creation, if exist * increment the obj counter and try the next object. Until we @@ -1140,14 +1145,13 @@ static void create_done(struct exofs_io_state *ios, void *p) */ struct inode *exofs_new_inode(struct inode *dir, int mode) { - struct super_block *sb; + struct super_block *sb = dir->i_sb; + struct exofs_sb_info *sbi = sb->s_fs_info; struct inode *inode; struct exofs_i_info *oi; - struct exofs_sb_info *sbi; - struct exofs_io_state *ios; + struct ore_io_state *ios; int ret; - sb = dir->i_sb; inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); @@ -1157,8 +1161,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) set_obj_2bcreated(oi); - sbi = sb->s_fs_info; - inode->i_mapping->backing_dev_info = sb->s_bdi; inode_init_owner(inode, dir, mode); inode->i_ino = sbi->s_nextid++; @@ -1170,25 +1172,24 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) spin_unlock(&sbi->s_next_gen_lock); insert_inode_hash(inode); + exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info, + exofs_oi_objno(oi)); exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */ mark_inode_dirty(inode); - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); if (unlikely(ret)) { - EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); + EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n"); return ERR_PTR(ret); } - ios->obj.id = exofs_oi_objno(oi); - exofs_make_credential(oi->i_cred, &ios->obj); - ios->done = create_done; ios->private = inode; - ios->cred = oi->i_cred; - ret = exofs_sbi_create(ios); + + ret = ore_create(ios); if (ret) { - exofs_put_io_state(ios); + ore_put_io_state(ios); return ERR_PTR(ret); } atomic_inc(&sbi->s_curr_pending); @@ -1207,11 +1208,11 @@ struct updatei_args { /* * Callback function from exofs_update_inode(). */ -static void updatei_done(struct exofs_io_state *ios, void *p) +static void updatei_done(struct ore_io_state *ios, void *p) { struct updatei_args *args = p; - exofs_put_io_state(ios); + ore_put_io_state(ios); atomic_dec(&args->sbi->s_curr_pending); @@ -1227,7 +1228,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync) struct exofs_i_info *oi = exofs_i(inode); struct super_block *sb = inode->i_sb; struct exofs_sb_info *sbi = sb->s_fs_info; - struct exofs_io_state *ios; + struct ore_io_state *ios; struct osd_attr attr; struct exofs_fcb *fcb; struct updatei_args *args; @@ -1266,9 +1267,9 @@ static int exofs_update_inode(struct inode *inode, int do_sync) } else memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); if (unlikely(ret)) { - EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); + EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); goto free_args; } @@ -1285,13 +1286,13 @@ static int exofs_update_inode(struct inode *inode, int do_sync) ios->private = args; } - ret = exofs_oi_write(oi, ios); + ret = ore_write(ios); if (!do_sync && !ret) { atomic_inc(&sbi->s_curr_pending); goto out; /* deallocation in updatei_done */ } - exofs_put_io_state(ios); + ore_put_io_state(ios); free_args: kfree(args); out: @@ -1310,11 +1311,11 @@ int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) * Callback function from exofs_delete_inode() - don't have much cleaning up to * do. */ -static void delete_done(struct exofs_io_state *ios, void *p) +static void delete_done(struct ore_io_state *ios, void *p) { struct exofs_sb_info *sbi = p; - exofs_put_io_state(ios); + ore_put_io_state(ios); atomic_dec(&sbi->s_curr_pending); } @@ -1329,7 +1330,7 @@ void exofs_evict_inode(struct inode *inode) struct exofs_i_info *oi = exofs_i(inode); struct super_block *sb = inode->i_sb; struct exofs_sb_info *sbi = sb->s_fs_info; - struct exofs_io_state *ios; + struct ore_io_state *ios; int ret; truncate_inode_pages(&inode->i_data, 0); @@ -1349,20 +1350,19 @@ void exofs_evict_inode(struct inode *inode) /* ignore the error, attempt a remove anyway */ /* Now Remove the OSD objects */ - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios); if (unlikely(ret)) { - EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); + EXOFS_ERR("%s: ore_get_io_state failed\n", __func__); return; } - ios->obj.id = exofs_oi_objno(oi); ios->done = delete_done; ios->private = sbi; - ios->cred = oi->i_cred; - ret = exofs_sbi_remove(ios); + + ret = ore_remove(ios); if (ret) { - EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__); - exofs_put_io_state(ios); + EXOFS_ERR("%s: ore_remove failed\n", __func__); + ore_put_io_state(ios); return; } atomic_inc(&sbi->s_curr_pending); diff --git a/fs/exofs/ios.c b/fs/exofs/ore.c index f74a2ec027a6..25305af88198 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ore.c @@ -23,81 +23,87 @@ */ #include <linux/slab.h> -#include <scsi/scsi_device.h> #include <asm/div64.h> -#include "exofs.h" +#include <scsi/osd_ore.h> -#define EXOFS_DBGMSG2(M...) do {} while (0) -/* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */ +#define ORE_ERR(fmt, a...) printk(KERN_ERR "ore: " fmt, ##a) -void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) -{ - osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); -} +#ifdef CONFIG_EXOFS_DEBUG +#define ORE_DBGMSG(fmt, a...) \ + printk(KERN_NOTICE "ore @%s:%d: " fmt, __func__, __LINE__, ##a) +#else +#define ORE_DBGMSG(fmt, a...) \ + do { if (0) printk(fmt, ##a); } while (0) +#endif -int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, - u64 offset, void *p, unsigned length) -{ - struct osd_request *or = osd_start_request(od, GFP_KERNEL); -/* struct osd_sense_info osi = {.key = 0};*/ - int ret; +/* u64 has problems with printk this will cast it to unsigned long long */ +#define _LLU(x) (unsigned long long)(x) - if (unlikely(!or)) { - EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); - return -ENOMEM; - } - ret = osd_req_read_kern(or, obj, offset, p, length); - if (unlikely(ret)) { - EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__); - goto out; - } +#define ORE_DBGMSG2(M...) do {} while (0) +/* #define ORE_DBGMSG2 ORE_DBGMSG */ - ret = osd_finalize_request(or, 0, cred, NULL); - if (unlikely(ret)) { - EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret); - goto out; - } +MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>"); +MODULE_DESCRIPTION("Objects Raid Engine ore.ko"); +MODULE_LICENSE("GPL"); - ret = osd_execute_request(or); - if (unlikely(ret)) - EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); - /* osd_req_decode_sense(or, ret); */ +static u8 *_ios_cred(struct ore_io_state *ios, unsigned index) +{ + return ios->comps->comps[index & ios->comps->single_comp].cred; +} -out: - osd_end_request(or); - return ret; +static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index) +{ + return &ios->comps->comps[index & ios->comps->single_comp].obj; } -int exofs_get_io_state(struct exofs_layout *layout, - struct exofs_io_state **pios) +static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index) { - struct exofs_io_state *ios; + return ios->comps->ods[index]; +} + +int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps, + bool is_reading, u64 offset, u64 length, + struct ore_io_state **pios) +{ + struct ore_io_state *ios; /*TODO: Maybe use kmem_cach per sbi of size * exofs_io_state_size(layout->s_numdevs) */ - ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL); + ios = kzalloc(ore_io_state_size(comps->numdevs), GFP_KERNEL); if (unlikely(!ios)) { - EXOFS_DBGMSG("Failed kzalloc bytes=%d\n", - exofs_io_state_size(layout->s_numdevs)); + ORE_DBGMSG("Failed kzalloc bytes=%d\n", + ore_io_state_size(comps->numdevs)); *pios = NULL; return -ENOMEM; } ios->layout = layout; - ios->obj.partition = layout->s_pid; + ios->comps = comps; + ios->offset = offset; + ios->length = length; + ios->reading = is_reading; + *pios = ios; return 0; } +EXPORT_SYMBOL(ore_get_rw_state); + +int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps, + struct ore_io_state **ios) +{ + return ore_get_rw_state(layout, comps, true, 0, 0, ios); +} +EXPORT_SYMBOL(ore_get_io_state); -void exofs_put_io_state(struct exofs_io_state *ios) +void ore_put_io_state(struct ore_io_state *ios) { if (ios) { unsigned i; for (i = 0; i < ios->numdevs; i++) { - struct exofs_per_dev_state *per_dev = &ios->per_dev[i]; + struct ore_per_dev_state *per_dev = &ios->per_dev[i]; if (per_dev->or) osd_end_request(per_dev->or); @@ -108,31 +114,9 @@ void exofs_put_io_state(struct exofs_io_state *ios) kfree(ios); } } +EXPORT_SYMBOL(ore_put_io_state); -unsigned exofs_layout_od_id(struct exofs_layout *layout, - osd_id obj_no, unsigned layout_index) -{ -/* switch (layout->lay_func) { - case LAYOUT_MOVING_WINDOW: - {*/ - unsigned dev_mod = obj_no; - - return (layout_index + dev_mod * layout->mirrors_p1) % - layout->s_numdevs; -/* } - case LAYOUT_FUNC_IMPLICT: - return layout->devs[layout_index]; - }*/ -} - -static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios, - unsigned layout_index) -{ - return ios->layout->s_ods[ - exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)]; -} - -static void _sync_done(struct exofs_io_state *ios, void *p) +static void _sync_done(struct ore_io_state *ios, void *p) { struct completion *waiting = p; @@ -141,20 +125,20 @@ static void _sync_done(struct exofs_io_state *ios, void *p) static void _last_io(struct kref *kref) { - struct exofs_io_state *ios = container_of( - kref, struct exofs_io_state, kref); + struct ore_io_state *ios = container_of( + kref, struct ore_io_state, kref); ios->done(ios, ios->private); } static void _done_io(struct osd_request *or, void *p) { - struct exofs_io_state *ios = p; + struct ore_io_state *ios = p; kref_put(&ios->kref, _last_io); } -static int exofs_io_execute(struct exofs_io_state *ios) +static int ore_io_execute(struct ore_io_state *ios) { DECLARE_COMPLETION_ONSTACK(wait); bool sync = (ios->done == NULL); @@ -170,9 +154,9 @@ static int exofs_io_execute(struct exofs_io_state *ios) if (unlikely(!or)) continue; - ret = osd_finalize_request(or, 0, ios->cred, NULL); + ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL); if (unlikely(ret)) { - EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", + ORE_DBGMSG("Failed to osd_finalize_request() => %d\n", ret); return ret; } @@ -194,7 +178,7 @@ static int exofs_io_execute(struct exofs_io_state *ios) if (sync) { wait_for_completion(&wait); - ret = exofs_check_io(ios, NULL); + ret = ore_check_io(ios, NULL); } return ret; } @@ -214,7 +198,7 @@ static void _clear_bio(struct bio *bio) } } -int exofs_check_io(struct exofs_io_state *ios, u64 *resid) +int ore_check_io(struct ore_io_state *ios, u64 *resid) { enum osd_err_priority acumulated_osd_err = 0; int acumulated_lin_err = 0; @@ -235,7 +219,7 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { /* start read offset passed endof file */ _clear_bio(ios->per_dev[i].bio); - EXOFS_DBGMSG("start read offset passed end of file " + ORE_DBGMSG("start read offset passed end of file " "offset=0x%llx, length=0x%llx\n", _LLU(ios->per_dev[i].offset), _LLU(ios->per_dev[i].length)); @@ -259,6 +243,7 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) return acumulated_lin_err; } +EXPORT_SYMBOL(ore_check_io); /* * L - logical offset into the file @@ -305,20 +290,21 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) struct _striping_info { u64 obj_offset; u64 group_length; + u64 M; /* for truncate */ unsigned dev; unsigned unit_off; }; -static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, +static void _calc_stripe_info(struct ore_layout *layout, u64 file_offset, struct _striping_info *si) { - u32 stripe_unit = ios->layout->stripe_unit; - u32 group_width = ios->layout->group_width; - u64 group_depth = ios->layout->group_depth; + u32 stripe_unit = layout->stripe_unit; + u32 group_width = layout->group_width; + u64 group_depth = layout->group_depth; u32 U = stripe_unit * group_width; u64 T = U * group_depth; - u64 S = T * ios->layout->group_count; + u64 S = T * layout->group_count; u64 M = div64_u64(file_offset, S); /* @@ -333,7 +319,7 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, /* "H - (N * U)" is just "H % U" so it's bound to u32 */ si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; - si->dev *= ios->layout->mirrors_p1; + si->dev *= layout->mirrors_p1; div_u64_rem(file_offset, stripe_unit, &si->unit_off); @@ -341,15 +327,16 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, (M * group_depth * stripe_unit); si->group_length = T - H; + si->M = M; } -static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, - unsigned pgbase, struct exofs_per_dev_state *per_dev, +static int _add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg, + unsigned pgbase, struct ore_per_dev_state *per_dev, int cur_len) { unsigned pg = *cur_pg; struct request_queue *q = - osd_request_queue(exofs_ios_od(ios, per_dev->dev)); + osd_request_queue(_ios_od(ios, per_dev->dev)); per_dev->length += cur_len; @@ -361,7 +348,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); if (unlikely(!per_dev->bio)) { - EXOFS_DBGMSG("Failed to allocate BIO size=%u\n", + ORE_DBGMSG("Failed to allocate BIO size=%u\n", bio_size); return -ENOMEM; } @@ -387,7 +374,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, return 0; } -static int _prepare_one_group(struct exofs_io_state *ios, u64 length, +static int _prepare_one_group(struct ore_io_state *ios, u64 length, struct _striping_info *si) { unsigned stripe_unit = ios->layout->stripe_unit; @@ -400,7 +387,7 @@ static int _prepare_one_group(struct exofs_io_state *ios, u64 length, int ret = 0; while (length) { - struct exofs_per_dev_state *per_dev = &ios->per_dev[dev]; + struct ore_per_dev_state *per_dev = &ios->per_dev[dev]; unsigned cur_len, page_off = 0; if (!per_dev->length) { @@ -443,7 +430,7 @@ out: return ret; } -static int _prepare_for_striping(struct exofs_io_state *ios) +static int _prepare_for_striping(struct ore_io_state *ios) { u64 length = ios->length; u64 offset = ios->offset; @@ -452,9 +439,9 @@ static int _prepare_for_striping(struct exofs_io_state *ios) if (!ios->pages) { if (ios->kern_buff) { - struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; + struct ore_per_dev_state *per_dev = &ios->per_dev[0]; - _calc_stripe_info(ios, ios->offset, &si); + _calc_stripe_info(ios->layout, ios->offset, &si); per_dev->offset = si.obj_offset; per_dev->dev = si.dev; @@ -468,7 +455,7 @@ static int _prepare_for_striping(struct exofs_io_state *ios) } while (length) { - _calc_stripe_info(ios, offset, &si); + _calc_stripe_info(ios->layout, offset, &si); if (length < si.group_length) si.group_length = length; @@ -485,57 +472,59 @@ out: return ret; } -int exofs_sbi_create(struct exofs_io_state *ios) +int ore_create(struct ore_io_state *ios) { int i, ret; - for (i = 0; i < ios->layout->s_numdevs; i++) { + for (i = 0; i < ios->comps->numdevs; i++) { struct osd_request *or; - or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); if (unlikely(!or)) { - EXOFS_ERR("%s: osd_start_request failed\n", __func__); + ORE_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; goto out; } ios->per_dev[i].or = or; ios->numdevs++; - osd_req_create_object(or, &ios->obj); + osd_req_create_object(or, _ios_obj(ios, i)); } - ret = exofs_io_execute(ios); + ret = ore_io_execute(ios); out: return ret; } +EXPORT_SYMBOL(ore_create); -int exofs_sbi_remove(struct exofs_io_state *ios) +int ore_remove(struct ore_io_state *ios) { int i, ret; - for (i = 0; i < ios->layout->s_numdevs; i++) { + for (i = 0; i < ios->comps->numdevs; i++) { struct osd_request *or; - or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); if (unlikely(!or)) { - EXOFS_ERR("%s: osd_start_request failed\n", __func__); + ORE_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; goto out; } ios->per_dev[i].or = or; ios->numdevs++; - osd_req_remove_object(or, &ios->obj); + osd_req_remove_object(or, _ios_obj(ios, i)); } - ret = exofs_io_execute(ios); + ret = ore_io_execute(ios); out: return ret; } +EXPORT_SYMBOL(ore_remove); -static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) +static int _write_mirror(struct ore_io_state *ios, int cur_comp) { - struct exofs_per_dev_state *master_dev = &ios->per_dev[cur_comp]; + struct ore_per_dev_state *master_dev = &ios->per_dev[cur_comp]; unsigned dev = ios->per_dev[cur_comp].dev; unsigned last_comp = cur_comp + ios->layout->mirrors_p1; int ret = 0; @@ -544,12 +533,12 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) return 0; /* Just an empty slot */ for (; cur_comp < last_comp; ++cur_comp, ++dev) { - struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; + struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; struct osd_request *or; - or = osd_start_request(exofs_ios_od(ios, dev), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL); if (unlikely(!or)) { - EXOFS_ERR("%s: osd_start_request failed\n", __func__); + ORE_ERR("%s: osd_start_request failed\n", __func__); ret = -ENOMEM; goto out; } @@ -563,7 +552,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) bio = bio_kmalloc(GFP_KERNEL, master_dev->bio->bi_max_vecs); if (unlikely(!bio)) { - EXOFS_DBGMSG( + ORE_DBGMSG( "Failed to allocate BIO size=%u\n", master_dev->bio->bi_max_vecs); ret = -ENOMEM; @@ -582,25 +571,29 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) bio->bi_rw |= REQ_WRITE; } - osd_req_write(or, &ios->obj, per_dev->offset, bio, - per_dev->length); - EXOFS_DBGMSG("write(0x%llx) offset=0x%llx " + osd_req_write(or, _ios_obj(ios, dev), per_dev->offset, + bio, per_dev->length); + ORE_DBGMSG("write(0x%llx) offset=0x%llx " "length=0x%llx dev=%d\n", - _LLU(ios->obj.id), _LLU(per_dev->offset), + _LLU(_ios_obj(ios, dev)->id), + _LLU(per_dev->offset), _LLU(per_dev->length), dev); } else if (ios->kern_buff) { - ret = osd_req_write_kern(or, &ios->obj, per_dev->offset, - ios->kern_buff, ios->length); + ret = osd_req_write_kern(or, _ios_obj(ios, dev), + per_dev->offset, + ios->kern_buff, ios->length); if (unlikely(ret)) goto out; - EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx " + ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx " "length=0x%llx dev=%d\n", - _LLU(ios->obj.id), _LLU(per_dev->offset), + _LLU(_ios_obj(ios, dev)->id), + _LLU(per_dev->offset), _LLU(ios->length), dev); } else { - osd_req_set_attributes(or, &ios->obj); - EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", - _LLU(ios->obj.id), ios->out_attr_len, dev); + osd_req_set_attributes(or, _ios_obj(ios, dev)); + ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", + _LLU(_ios_obj(ios, dev)->id), + ios->out_attr_len, dev); } if (ios->out_attr) @@ -616,7 +609,7 @@ out: return ret; } -int exofs_sbi_write(struct exofs_io_state *ios) +int ore_write(struct ore_io_state *ios) { int i; int ret; @@ -626,52 +619,55 @@ int exofs_sbi_write(struct exofs_io_state *ios) return ret; for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { - ret = _sbi_write_mirror(ios, i); + ret = _write_mirror(ios, i); if (unlikely(ret)) return ret; } - ret = exofs_io_execute(ios); + ret = ore_io_execute(ios); return ret; } +EXPORT_SYMBOL(ore_write); -static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp) +static int _read_mirror(struct ore_io_state *ios, unsigned cur_comp) { struct osd_request *or; - struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; - unsigned first_dev = (unsigned)ios->obj.id; + struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; + struct osd_obj_id *obj = _ios_obj(ios, cur_comp); + unsigned first_dev = (unsigned)obj->id; if (ios->pages && !per_dev->length) return 0; /* Just an empty slot */ first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; - or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL); if (unlikely(!or)) { - EXOFS_ERR("%s: osd_start_request failed\n", __func__); + ORE_ERR("%s: osd_start_request failed\n", __func__); return -ENOMEM; } per_dev->or = or; if (ios->pages) { - osd_req_read(or, &ios->obj, per_dev->offset, + osd_req_read(or, obj, per_dev->offset, per_dev->bio, per_dev->length); - EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" - " dev=%d\n", _LLU(ios->obj.id), + ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" + " dev=%d\n", _LLU(obj->id), _LLU(per_dev->offset), _LLU(per_dev->length), first_dev); } else if (ios->kern_buff) { - int ret = osd_req_read_kern(or, &ios->obj, per_dev->offset, + int ret = osd_req_read_kern(or, obj, per_dev->offset, ios->kern_buff, ios->length); - EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " + ORE_DBGMSG2("read_kern(0x%llx) offset=0x%llx " "length=0x%llx dev=%d ret=>%d\n", - _LLU(ios->obj.id), _LLU(per_dev->offset), + _LLU(obj->id), _LLU(per_dev->offset), _LLU(ios->length), first_dev, ret); if (unlikely(ret)) return ret; } else { - osd_req_get_attributes(or, &ios->obj); - EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", - _LLU(ios->obj.id), ios->in_attr_len, first_dev); + osd_req_get_attributes(or, obj); + ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", + _LLU(obj->id), + ios->in_attr_len, first_dev); } if (ios->out_attr) osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len); @@ -682,7 +678,7 @@ static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp) return 0; } -int exofs_sbi_read(struct exofs_io_state *ios) +int ore_read(struct ore_io_state *ios) { int i; int ret; @@ -692,16 +688,17 @@ int exofs_sbi_read(struct exofs_io_state *ios) return ret; for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { - ret = _sbi_read_mirror(ios, i); + ret = _read_mirror(ios, i); if (unlikely(ret)) return ret; } - ret = exofs_io_execute(ios); + ret = ore_io_execute(ios); return ret; } +EXPORT_SYMBOL(ore_read); -int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) +int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr) { struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ void *iter = NULL; @@ -721,83 +718,118 @@ int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) return -EIO; } +EXPORT_SYMBOL(extract_attr_from_ios); -static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp, +static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp, struct osd_attr *attr) { int last_comp = cur_comp + ios->layout->mirrors_p1; for (; cur_comp < last_comp; ++cur_comp) { - struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; + struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; struct osd_request *or; - or = osd_start_request(exofs_ios_od(ios, cur_comp), GFP_KERNEL); + or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL); if (unlikely(!or)) { - EXOFS_ERR("%s: osd_start_request failed\n", __func__); + ORE_ERR("%s: osd_start_request failed\n", __func__); return -ENOMEM; } per_dev->or = or; - osd_req_set_attributes(or, &ios->obj); + osd_req_set_attributes(or, _ios_obj(ios, cur_comp)); osd_req_add_set_attr_list(or, attr, 1); } return 0; } -int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) +struct _trunc_info { + struct _striping_info si; + u64 prev_group_obj_off; + u64 next_group_obj_off; + + unsigned first_group_dev; + unsigned nex_group_dev; + unsigned max_devs; +}; + +void _calc_trunk_info(struct ore_layout *layout, u64 file_offset, + struct _trunc_info *ti) +{ + unsigned stripe_unit = layout->stripe_unit; + + _calc_stripe_info(layout, file_offset, &ti->si); + + ti->prev_group_obj_off = ti->si.M * stripe_unit; + ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0; + + ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width); + ti->nex_group_dev = ti->first_group_dev + layout->group_width; + ti->max_devs = layout->group_width * layout->group_count; +} + +int ore_truncate(struct ore_layout *layout, struct ore_components *comps, + u64 size) { - struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; - struct exofs_io_state *ios; + struct ore_io_state *ios; struct exofs_trunc_attr { struct osd_attr attr; __be64 newsize; } *size_attrs; - struct _striping_info si; + struct _trunc_info ti; int i, ret; - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = ore_get_io_state(layout, comps, &ios); if (unlikely(ret)) return ret; - size_attrs = kcalloc(ios->layout->group_width, sizeof(*size_attrs), + _calc_trunk_info(ios->layout, size, &ti); + + size_attrs = kcalloc(ti.max_devs, sizeof(*size_attrs), GFP_KERNEL); if (unlikely(!size_attrs)) { ret = -ENOMEM; goto out; } - ios->obj.id = exofs_oi_objno(oi); - ios->cred = oi->i_cred; + ios->numdevs = ios->comps->numdevs; - ios->numdevs = ios->layout->s_numdevs; - _calc_stripe_info(ios, size, &si); - - for (i = 0; i < ios->layout->group_width; ++i) { + for (i = 0; i < ti.max_devs; ++i) { struct exofs_trunc_attr *size_attr = &size_attrs[i]; u64 obj_size; - if (i < si.dev) - obj_size = si.obj_offset + - ios->layout->stripe_unit - si.unit_off; - else if (i == si.dev) - obj_size = si.obj_offset; - else /* i > si.dev */ - obj_size = si.obj_offset - si.unit_off; + if (i < ti.first_group_dev) + obj_size = ti.prev_group_obj_off; + else if (i >= ti.nex_group_dev) + obj_size = ti.next_group_obj_off; + else if (i < ti.si.dev) /* dev within this group */ + obj_size = ti.si.obj_offset + + ios->layout->stripe_unit - ti.si.unit_off; + else if (i == ti.si.dev) + obj_size = ti.si.obj_offset; + else /* i > ti.dev */ + obj_size = ti.si.obj_offset - ti.si.unit_off; size_attr->newsize = cpu_to_be64(obj_size); size_attr->attr = g_attr_logical_length; size_attr->attr.val_ptr = &size_attr->newsize; + ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", + _LLU(comps->comps->obj.id), _LLU(obj_size), i); ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, &size_attr->attr); if (unlikely(ret)) goto out; } - ret = exofs_io_execute(ios); + ret = ore_io_execute(ios); out: kfree(size_attrs); - exofs_put_io_state(ios); + ore_put_io_state(ios); return ret; } +EXPORT_SYMBOL(ore_truncate); + +const struct osd_attr g_attr_logical_length = ATTR_DEF( + OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); +EXPORT_SYMBOL(g_attr_logical_length); diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h deleted file mode 100644 index c52e9888b8ab..000000000000 --- a/fs/exofs/pnfs.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (C) 2008, 2009 - * Boaz Harrosh <bharrosh@panasas.com> - * - * This file is part of exofs. - * - * exofs is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License version 2 as published by the Free - * Software Foundation. - * - */ - -/* FIXME: Remove this file once pnfs hits mainline */ - -#ifndef __EXOFS_PNFS_H__ -#define __EXOFS_PNFS_H__ - -#if ! defined(__PNFS_OSD_XDR_H__) - -enum pnfs_iomode { - IOMODE_READ = 1, - IOMODE_RW = 2, - IOMODE_ANY = 3, -}; - -/* Layout Structure */ -enum pnfs_osd_raid_algorithm4 { - PNFS_OSD_RAID_0 = 1, - PNFS_OSD_RAID_4 = 2, - PNFS_OSD_RAID_5 = 3, - PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */ -}; - -struct pnfs_osd_data_map { - u32 odm_num_comps; - u64 odm_stripe_unit; - u32 odm_group_width; - u32 odm_group_depth; - u32 odm_mirror_cnt; - u32 odm_raid_algorithm; -}; - -#endif /* ! defined(__PNFS_OSD_XDR_H__) */ - -#endif /* __EXOFS_PNFS_H__ */ diff --git a/fs/exofs/super.c b/fs/exofs/super.c index c57beddcc217..274894053b02 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -40,6 +40,8 @@ #include "exofs.h" +#define EXOFS_DBGMSG2(M...) do {} while (0) + /****************************************************************************** * MOUNT OPTIONS *****************************************************************************/ @@ -208,10 +210,48 @@ static void destroy_inodecache(void) } /****************************************************************************** - * SUPERBLOCK FUNCTIONS + * Some osd helpers *****************************************************************************/ -static const struct super_operations exofs_sops; -static const struct export_operations exofs_export_ops; +void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) +{ + osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); +} + +static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, + u64 offset, void *p, unsigned length) +{ + struct osd_request *or = osd_start_request(od, GFP_KERNEL); +/* struct osd_sense_info osi = {.key = 0};*/ + int ret; + + if (unlikely(!or)) { + EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); + return -ENOMEM; + } + ret = osd_req_read_kern(or, obj, offset, p, length); + if (unlikely(ret)) { + EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__); + goto out; + } + + ret = osd_finalize_request(or, 0, cred, NULL); + if (unlikely(ret)) { + EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret); + goto out; + } + + ret = osd_execute_request(or); + if (unlikely(ret)) + EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); + /* osd_req_decode_sense(or, ret); */ + +out: + osd_end_request(or); + EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " + "length=0x%llx dev=%p ret=>%d\n", + _LLU(obj->id), _LLU(offset), _LLU(length), od, ret); + return ret; +} static const struct osd_attr g_attr_sb_stats = ATTR_DEF( EXOFS_APAGE_SB_DATA, @@ -223,21 +263,19 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi) struct osd_attr attrs[] = { [0] = g_attr_sb_stats, }; - struct exofs_io_state *ios; + struct ore_io_state *ios; int ret; - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios); if (unlikely(ret)) { - EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); + EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); return ret; } - ios->cred = sbi->s_cred; - ios->in_attr = attrs; ios->in_attr_len = ARRAY_SIZE(attrs); - ret = exofs_sbi_read(ios); + ret = ore_read(ios); if (unlikely(ret)) { EXOFS_ERR("Error reading super_block stats => %d\n", ret); goto out; @@ -264,13 +302,13 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi) } out: - exofs_put_io_state(ios); + ore_put_io_state(ios); return ret; } -static void stats_done(struct exofs_io_state *ios, void *p) +static void stats_done(struct ore_io_state *ios, void *p) { - exofs_put_io_state(ios); + ore_put_io_state(ios); /* Good thanks nothing to do anymore */ } @@ -280,12 +318,12 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi) struct osd_attr attrs[] = { [0] = g_attr_sb_stats, }; - struct exofs_io_state *ios; + struct ore_io_state *ios; int ret; - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios); if (unlikely(ret)) { - EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); + EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); return ret; } @@ -293,21 +331,27 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi) sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles); attrs[0].val_ptr = &sbi->s_ess; - ios->cred = sbi->s_cred; + ios->done = stats_done; ios->private = sbi; ios->out_attr = attrs; ios->out_attr_len = ARRAY_SIZE(attrs); - ret = exofs_sbi_write(ios); + ret = ore_write(ios); if (unlikely(ret)) { - EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); - exofs_put_io_state(ios); + EXOFS_ERR("%s: ore_write failed.\n", __func__); + ore_put_io_state(ios); } return ret; } +/****************************************************************************** + * SUPERBLOCK FUNCTIONS + *****************************************************************************/ +static const struct super_operations exofs_sops; +static const struct export_operations exofs_export_ops; + /* * Write the superblock to the OSD */ @@ -315,7 +359,9 @@ int exofs_sync_fs(struct super_block *sb, int wait) { struct exofs_sb_info *sbi; struct exofs_fscb *fscb; - struct exofs_io_state *ios; + struct ore_comp one_comp; + struct ore_components comps; + struct ore_io_state *ios; int ret = -ENOMEM; fscb = kmalloc(sizeof(*fscb), GFP_KERNEL); @@ -331,7 +377,10 @@ int exofs_sync_fs(struct super_block *sb, int wait) * version). Otherwise the exofs_fscb is read-only from mkfs time. All * the writeable info is set in exofs_sbi_write_stats() above. */ - ret = exofs_get_io_state(&sbi->layout, &ios); + + exofs_init_comps(&comps, &one_comp, sbi, EXOFS_SUPER_ID); + + ret = ore_get_io_state(&sbi->layout, &comps, &ios); if (unlikely(ret)) goto out; @@ -345,14 +394,12 @@ int exofs_sync_fs(struct super_block *sb, int wait) fscb->s_newfs = 0; fscb->s_version = EXOFS_FSCB_VER; - ios->obj.id = EXOFS_SUPER_ID; ios->offset = 0; ios->kern_buff = fscb; - ios->cred = sbi->s_cred; - ret = exofs_sbi_write(ios); + ret = ore_write(ios); if (unlikely(ret)) - EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); + EXOFS_ERR("%s: ore_write failed.\n", __func__); else sb->s_dirt = 0; @@ -360,7 +407,7 @@ int exofs_sync_fs(struct super_block *sb, int wait) unlock_super(sb); out: EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); - exofs_put_io_state(ios); + ore_put_io_state(ios); kfree(fscb); return ret; } @@ -384,15 +431,17 @@ static void _exofs_print_device(const char *msg, const char *dev_path, void exofs_free_sbi(struct exofs_sb_info *sbi) { - while (sbi->layout.s_numdevs) { - int i = --sbi->layout.s_numdevs; - struct osd_dev *od = sbi->layout.s_ods[i]; + while (sbi->comps.numdevs) { + int i = --sbi->comps.numdevs; + struct osd_dev *od = sbi->comps.ods[i]; if (od) { - sbi->layout.s_ods[i] = NULL; + sbi->comps.ods[i] = NULL; osduld_put_device(od); } } + if (sbi->comps.ods != sbi->_min_one_dev) + kfree(sbi->comps.ods); kfree(sbi); } @@ -419,8 +468,8 @@ static void exofs_put_super(struct super_block *sb) msecs_to_jiffies(100)); } - _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], - sbi->layout.s_pid); + _exofs_print_device("Unmounting", NULL, sbi->comps.ods[0], + sbi->one_comp.obj.partition); bdi_destroy(&sbi->bdi); exofs_free_sbi(sbi); @@ -501,10 +550,19 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, return -EINVAL; } + EXOFS_DBGMSG("exofs: layout: " + "num_comps=%u stripe_unit=0x%x group_width=%u " + "group_depth=0x%llx mirrors_p1=%u raid_algorithm=%u\n", + numdevs, + sbi->layout.stripe_unit, + sbi->layout.group_width, + _LLU(sbi->layout.group_depth), + sbi->layout.mirrors_p1, + sbi->data_map.odm_raid_algorithm); return 0; } -static unsigned __ra_pages(struct exofs_layout *layout) +static unsigned __ra_pages(struct ore_layout *layout) { const unsigned _MIN_RA = 32; /* min 128K read-ahead */ unsigned ra_pages = layout->group_width * layout->stripe_unit / @@ -547,13 +605,11 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, return !(odi->systemid_len || odi->osdname_len); } -static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, +static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, + struct osd_dev *fscb_od, unsigned table_count) { - struct exofs_sb_info *sbi = *psbi; - struct osd_dev *fscb_od; - struct osd_obj_id obj = {.partition = sbi->layout.s_pid, - .id = EXOFS_DEVTABLE_ID}; + struct ore_comp comp; struct exofs_device_table *dt; unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + sizeof(*dt); @@ -567,10 +623,14 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, return -ENOMEM; } - fscb_od = sbi->layout.s_ods[0]; - sbi->layout.s_ods[0] = NULL; - sbi->layout.s_numdevs = 0; - ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes); + sbi->comps.numdevs = 0; + + comp.obj.partition = sbi->one_comp.obj.partition; + comp.obj.id = EXOFS_DEVTABLE_ID; + exofs_make_credential(comp.cred, &comp.obj); + + ret = exofs_read_kern(fscb_od, comp.cred, &comp.obj, 0, dt, + table_bytes); if (unlikely(ret)) { EXOFS_ERR("ERROR: reading device table\n"); goto out; @@ -588,16 +648,18 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, goto out; if (likely(numdevs > 1)) { - unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]); + unsigned size = numdevs * sizeof(sbi->comps.ods[0]); - sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL); - if (unlikely(!sbi)) { + /* Twice bigger table: See exofs_init_comps() and below + * comment + */ + sbi->comps.ods = kzalloc(size + size - 1, GFP_KERNEL); + if (unlikely(!sbi->comps.ods)) { + EXOFS_ERR("ERROR: faild allocating Device array[%d]\n", + numdevs); ret = -ENOMEM; goto out; } - memset(&sbi->layout.s_ods[1], 0, - size - sizeof(sbi->layout.s_ods[0])); - *psbi = sbi; } for (i = 0; i < numdevs; i++) { @@ -619,8 +681,8 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, * line. We always keep them in device-table order. */ if (fscb_od && osduld_device_same(fscb_od, &odi)) { - sbi->layout.s_ods[i] = fscb_od; - ++sbi->layout.s_numdevs; + sbi->comps.ods[i] = fscb_od; + ++sbi->comps.numdevs; fscb_od = NULL; continue; } @@ -633,13 +695,13 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, goto out; } - sbi->layout.s_ods[i] = od; - ++sbi->layout.s_numdevs; + sbi->comps.ods[i] = od; + ++sbi->comps.numdevs; /* Read the fscb of the other devices to make sure the FS * partition is there. */ - ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, + ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, sizeof(fscb)); if (unlikely(ret)) { EXOFS_ERR("ERROR: Malformed participating device " @@ -656,13 +718,22 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, out: kfree(dt); - if (unlikely(!ret && fscb_od)) { - EXOFS_ERR( - "ERROR: Bad device-table container device not present\n"); - osduld_put_device(fscb_od); - ret = -EINVAL; - } + if (likely(!ret)) { + unsigned numdevs = sbi->comps.numdevs; + if (unlikely(fscb_od)) { + EXOFS_ERR("ERROR: Bad device-table container device not present\n"); + osduld_put_device(fscb_od); + return -EINVAL; + } + /* exofs round-robins the device table view according to inode + * number. We hold a: twice bigger table hence inodes can point + * to any device and have a sequential view of the table + * starting at this device. See exofs_init_comps() + */ + for (i = 0; i < numdevs - 1; ++i) + sbi->comps.ods[i + numdevs] = sbi->comps.ods[i]; + } return ret; } @@ -676,7 +747,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) struct exofs_sb_info *sbi; /*extended info */ struct osd_dev *od; /* Master device */ struct exofs_fscb fscb; /*on-disk superblock info */ - struct osd_obj_id obj; + struct ore_comp comp; unsigned table_count; int ret; @@ -684,10 +755,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) if (!sbi) return -ENOMEM; - ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); - if (ret) - goto free_bdi; - /* use mount options to fill superblock */ if (opts->is_osdname) { struct osd_dev_info odi = {.systemid_len = 0}; @@ -695,6 +762,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) odi.osdname_len = strlen(opts->dev_name); odi.osdname = (u8 *)opts->dev_name; od = osduld_info_lookup(&odi); + kfree(opts->dev_name); + opts->dev_name = NULL; } else { od = osduld_path_lookup(opts->dev_name); } @@ -709,11 +778,16 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) sbi->layout.group_width = 1; sbi->layout.group_depth = -1; sbi->layout.group_count = 1; - sbi->layout.s_ods[0] = od; - sbi->layout.s_numdevs = 1; - sbi->layout.s_pid = opts->pid; sbi->s_timeout = opts->timeout; + sbi->one_comp.obj.partition = opts->pid; + sbi->one_comp.obj.id = 0; + exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj); + sbi->comps.numdevs = 1; + sbi->comps.single_comp = EC_SINGLE_COMP; + sbi->comps.comps = &sbi->one_comp; + sbi->comps.ods = sbi->_min_one_dev; + /* fill in some other data by hand */ memset(sb->s_id, 0, sizeof(sb->s_id)); strcpy(sb->s_id, "exofs"); @@ -724,11 +798,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) sb->s_bdev = NULL; sb->s_dev = 0; - obj.partition = sbi->layout.s_pid; - obj.id = EXOFS_SUPER_ID; - exofs_make_credential(sbi->s_cred, &obj); + comp.obj.partition = sbi->one_comp.obj.partition; + comp.obj.id = EXOFS_SUPER_ID; + exofs_make_credential(comp.cred, &comp.obj); - ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb)); + ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, sizeof(fscb)); if (unlikely(ret)) goto free_sbi; @@ -757,9 +831,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) table_count = le64_to_cpu(fscb.s_dev_table_count); if (table_count) { - ret = exofs_read_lookup_dev_table(&sbi, table_count); + ret = exofs_read_lookup_dev_table(sbi, od, table_count); if (unlikely(ret)) goto free_sbi; + } else { + sbi->comps.ods[0] = od; } __sbi_read_stats(sbi); @@ -793,20 +869,20 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; } - _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], - sbi->layout.s_pid); - if (opts->is_osdname) - kfree(opts->dev_name); + ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); + if (ret) { + EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); + goto free_sbi; + } + + _exofs_print_device("Mounting", opts->dev_name, sbi->comps.ods[0], + sbi->one_comp.obj.partition); return 0; free_sbi: - bdi_destroy(&sbi->bdi); -free_bdi: EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", - opts->dev_name, sbi->layout.s_pid, ret); + opts->dev_name, sbi->one_comp.obj.partition, ret); exofs_free_sbi(sbi); - if (opts->is_osdname) - kfree(opts->dev_name); return ret; } @@ -837,7 +913,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct exofs_sb_info *sbi = sb->s_fs_info; - struct exofs_io_state *ios; + struct ore_io_state *ios; struct osd_attr attrs[] = { ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), @@ -846,21 +922,18 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) }; uint64_t capacity = ULLONG_MAX; uint64_t used = ULLONG_MAX; - uint8_t cred_a[OSD_CAP_LEN]; int ret; - ret = exofs_get_io_state(&sbi->layout, &ios); + ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios); if (ret) { - EXOFS_DBGMSG("exofs_get_io_state failed.\n"); + EXOFS_DBGMSG("ore_get_io_state failed.\n"); return ret; } - exofs_make_credential(cred_a, &ios->obj); - ios->cred = sbi->s_cred; ios->in_attr = attrs; ios->in_attr_len = ARRAY_SIZE(attrs); - ret = exofs_sbi_read(ios); + ret = ore_read(ios); if (unlikely(ret)) goto out; @@ -889,7 +962,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = EXOFS_NAME_LEN; out: - exofs_put_io_state(ios); + ore_put_io_state(ios); return ret; } diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 6e18a0b7750d..5571708b6a58 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2209,9 +2209,11 @@ static int ext3_symlink (struct inode * dir, /* * For non-fast symlinks, we just allocate inode and put it on * orphan list in the first transaction => we need bitmap, - * group descriptor, sb, inode block, quota blocks. + * group descriptor, sb, inode block, quota blocks, and + * possibly selinux xattr blocks. */ - credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); + credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + + EXT3_XATTR_TRANS_BLOCKS; } else { /* * Fast symlink. We have to add entry to directory diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 565a154e22d4..f8068c7bae9f 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2253,9 +2253,11 @@ static int ext4_symlink(struct inode *dir, /* * For non-fast symlinks, we just allocate inode and put it on * orphan list in the first transaction => we need bitmap, - * group descriptor, sb, inode block, quota blocks. + * group descriptor, sb, inode block, quota blocks, and + * possibly selinux xattr blocks. */ - credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); + credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + + EXT4_XATTR_TRANS_BLOCKS; } else { /* * Fast symlink. We have to add entry to directory diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 4ad64732cbce..5efbd5d7701a 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -1231,7 +1231,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots, struct super_block *sb = dir->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */ - struct msdos_dir_entry *de; + struct msdos_dir_entry *uninitialized_var(de); int err, free_slots, i, nr_bhs; loff_t pos, i_pos; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 5942fec22c65..1726d7303047 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1188,9 +1188,9 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, out: /* UTF-8 doesn't provide FAT semantics */ if (!strcmp(opts->iocharset, "utf8")) { - fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset" + fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset" " for FAT filesystems, filesystem will be " - "case sensitive!\n"); + "case sensitive!"); } /* If user doesn't specify allow_utime, it's initialized from dmask. */ @@ -1367,6 +1367,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sbi->free_clusters = -1; /* Don't know yet */ sbi->free_clus_valid = 0; sbi->prev_free = FAT_START_ENT; + sb->s_maxbytes = 0xffffffff; if (!sbi->fat_length && b->fat32_length) { struct fat_boot_fsinfo *fsinfo; @@ -1377,8 +1378,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sbi->fat_length = le32_to_cpu(b->fat32_length); sbi->root_cluster = le32_to_cpu(b->root_cluster); - sb->s_maxbytes = 0xffffffff; - /* MC - if info_sector is 0, don't multiply by 0 */ sbi->fsinfo_sector = le16_to_cpu(b->info_sector); if (sbi->fsinfo_sector == 0) diff --git a/fs/inode.c b/fs/inode.c index 5aab80dc008c..73920d555c88 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -143,6 +143,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_op = &empty_iops; inode->i_fop = &empty_fops; inode->i_nlink = 1; + inode->i_opflags = 0; inode->i_uid = 0; inode->i_gid = 0; atomic_set(&inode->i_writecount, 0); diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index adcf92d3b603..7971f37534a3 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c @@ -68,7 +68,7 @@ int jfs_umount(struct super_block *sb) /* * Wait for outstanding transactions to be written to log: */ - jfs_flush_journal(log, 1); + jfs_flush_journal(log, 2); /* * close fileset inode allocation map (aka fileset inode) @@ -146,7 +146,7 @@ int jfs_umount_rw(struct super_block *sb) * * remove file system from log active file system list. */ - jfs_flush_journal(log, 1); + jfs_flush_journal(log, 2); /* * Make sure all metadata makes it to disk diff --git a/fs/namei.c b/fs/namei.c index 445fd5da11fa..2826db35dc25 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -179,19 +179,14 @@ static int check_acl(struct inode *inode, int mask) #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *acl; - /* - * Under RCU walk, we cannot even do a "get_cached_acl()", - * because that involves locking and getting a refcount on - * a cached ACL. - * - * So the only case we handle during RCU walking is the - * case of a cached "no ACL at all", which needs no locks - * or refcounts. - */ if (mask & MAY_NOT_BLOCK) { - if (negative_cached_acl(inode, ACL_TYPE_ACCESS)) + acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS); + if (!acl) return -EAGAIN; - return -ECHILD; + /* no ->get_acl() calls in RCU mode... */ + if (acl == ACL_NOT_CACHED) + return -ECHILD; + return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK); } acl = get_cached_acl(inode, ACL_TYPE_ACCESS); @@ -313,6 +308,26 @@ int generic_permission(struct inode *inode, int mask) return -EACCES; } +/* + * We _really_ want to just do "generic_permission()" without + * even looking at the inode->i_op values. So we keep a cache + * flag in inode->i_opflags, that says "this has not special + * permission function, use the fast case". + */ +static inline int do_inode_permission(struct inode *inode, int mask) +{ + if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) { + if (likely(inode->i_op->permission)) + return inode->i_op->permission(inode, mask); + + /* This gets set once for the inode lifetime */ + spin_lock(&inode->i_lock); + inode->i_opflags |= IOP_FASTPERM; + spin_unlock(&inode->i_lock); + } + return generic_permission(inode, mask); +} + /** * inode_permission - check for access rights to a given inode * @inode: inode to check permission on @@ -327,7 +342,7 @@ int inode_permission(struct inode *inode, int mask) { int retval; - if (mask & MAY_WRITE) { + if (unlikely(mask & MAY_WRITE)) { umode_t mode = inode->i_mode; /* @@ -344,11 +359,7 @@ int inode_permission(struct inode *inode, int mask) return -EACCES; } - if (inode->i_op->permission) - retval = inode->i_op->permission(inode, mask); - else - retval = generic_permission(inode, mask); - + retval = do_inode_permission(inode, mask); if (retval) return retval; @@ -1250,6 +1261,26 @@ static void terminate_walk(struct nameidata *nd) } } +/* + * Do we need to follow links? We _really_ want to be able + * to do this check without having to look at inode->i_op, + * so we keep a cache of "no, this doesn't need follow_link" + * for the common case. + */ +static inline int should_follow_link(struct inode *inode, int follow) +{ + if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { + if (likely(inode->i_op->follow_link)) + return follow; + + /* This gets set once for the inode lifetime */ + spin_lock(&inode->i_lock); + inode->i_opflags |= IOP_NOFOLLOW; + spin_unlock(&inode->i_lock); + } + return 0; +} + static inline int walk_component(struct nameidata *nd, struct path *path, struct qstr *name, int type, int follow) { @@ -1272,7 +1303,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path, terminate_walk(nd); return -ENOENT; } - if (unlikely(inode->i_op->follow_link) && follow) { + if (should_follow_link(inode, follow)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { terminate_walk(nd); @@ -1325,6 +1356,26 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd) } /* + * We really don't want to look at inode->i_op->lookup + * when we don't have to. So we keep a cache bit in + * the inode ->i_opflags field that says "yes, we can + * do lookup on this inode". + */ +static inline int can_lookup(struct inode *inode) +{ + if (likely(inode->i_opflags & IOP_LOOKUP)) + return 1; + if (likely(!inode->i_op->lookup)) + return 0; + + /* We do this once for the lifetime of the inode */ + spin_lock(&inode->i_lock); + inode->i_opflags |= IOP_LOOKUP; + spin_unlock(&inode->i_lock); + return 1; +} + +/* * Name resolution. * This is the basic name resolution function, turning a pathname into * the final dentry. We expect 'base' to be positive and a directory. @@ -1403,10 +1454,10 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (err) return err; } + if (can_lookup(nd->inode)) + continue; err = -ENOTDIR; - if (!nd->inode->i_op->lookup) - break; - continue; + break; /* here ends the main loop */ last_component: diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index be020771c6b4..dbcd82126aed 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -79,12 +79,9 @@ config NFS_V4_1 depends on NFS_FS && NFS_V4 && EXPERIMENTAL select SUNRPC_BACKCHANNEL select PNFS_FILE_LAYOUT - select PNFS_BLOCK - select MD - select BLK_DEV_DM help This option enables support for minor version 1 of the NFSv4 protocol - (RFC 5661 and RFC 5663) in the kernel's NFS client. + (RFC 5661) in the kernel's NFS client. If unsure, say N. @@ -93,16 +90,13 @@ config PNFS_FILE_LAYOUT config PNFS_BLOCK tristate + depends on NFS_FS && NFS_V4_1 && BLK_DEV_DM + default m config PNFS_OBJLAYOUT - tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)" + tristate depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD - help - Say M here if you want your pNFS client to support the Objects Layout Driver. - Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and - upper level driver (SCSI_OSD_ULD). - - If unsure, say N. + default m config ROOT_NFS bool "Root file system on NFS" diff --git a/fs/proc/base.c b/fs/proc/base.c index 08e3eccf9a12..5eb02069e1b8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1118,7 +1118,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, * Warn that /proc/pid/oom_adj is deprecated, see * Documentation/feature-removal-schedule.txt. */ - WARN_ONCE(1, "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", + printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", current->comm, task_pid_nr(current), task_pid_nr(task), task_pid_nr(task)); task->signal->oom_adj = oom_adjust; @@ -1919,6 +1919,14 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info) spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (file) { + unsigned int f_flags; + struct fdtable *fdt; + + fdt = files_fdtable(files); + f_flags = file->f_flags & ~O_CLOEXEC; + if (FD_ISSET(fd, fdt->close_on_exec)) + f_flags |= O_CLOEXEC; + if (path) { *path = file->f_path; path_get(&file->f_path); @@ -1928,7 +1936,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info) "pos:\t%lli\n" "flags:\t0%o\n", (long long) file->f_pos, - file->f_flags); + f_flags); spin_unlock(&files->file_lock); put_files_struct(files); return 0; diff --git a/fs/stat.c b/fs/stat.c index 961039121cb8..ba5316ffac61 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -27,12 +27,12 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) stat->uid = inode->i_uid; stat->gid = inode->i_gid; stat->rdev = inode->i_rdev; + stat->size = i_size_read(inode); stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; stat->ctime = inode->i_ctime; - stat->size = i_size_read(inode); - stat->blocks = inode->i_blocks; stat->blksize = (1 << inode->i_blkbits); + stat->blocks = inode->i_blocks; } EXPORT_SYMBOL(generic_fillattr); diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index d1fe74506c4c..c57836dc778f 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -596,7 +596,7 @@ _xfs_buf_read( bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); status = xfs_buf_iorequest(bp); - if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC)) + if (status || bp->b_error || (flags & XBF_ASYNC)) return status; return xfs_buf_iowait(bp); } @@ -679,7 +679,6 @@ xfs_buf_read_uncached( /* set up the buffer for a read IO */ XFS_BUF_SET_ADDR(bp, daddr); XFS_BUF_READ(bp); - XFS_BUF_BUSY(bp); xfsbdstrat(mp, bp); error = xfs_buf_iowait(bp); @@ -1069,7 +1068,7 @@ xfs_bioerror( /* * No need to wait until the buffer is unpinned, we aren't flushing it. */ - XFS_BUF_ERROR(bp, EIO); + xfs_buf_ioerror(bp, EIO); /* * We're calling xfs_buf_ioend, so delete XBF_DONE flag. @@ -1094,7 +1093,7 @@ STATIC int xfs_bioerror_relse( struct xfs_buf *bp) { - int64_t fl = XFS_BUF_BFLAGS(bp); + int64_t fl = bp->b_flags; /* * No need to wait until the buffer is unpinned. * We aren't flushing it. @@ -1115,7 +1114,7 @@ xfs_bioerror_relse( * There's no reason to mark error for * ASYNC buffers. */ - XFS_BUF_ERROR(bp, EIO); + xfs_buf_ioerror(bp, EIO); XFS_BUF_FINISH_IOWAIT(bp); } else { xfs_buf_relse(bp); @@ -1324,7 +1323,7 @@ xfs_buf_offset( struct page *page; if (bp->b_flags & XBF_MAPPED) - return XFS_BUF_PTR(bp) + offset; + return bp->b_addr + offset; offset += bp->b_offset; page = bp->b_pages[offset >> PAGE_SHIFT]; @@ -1484,7 +1483,7 @@ xfs_setsize_buftarg_flags( if (set_blocksize(btp->bt_bdev, sectorsize)) { xfs_warn(btp->bt_mount, "Cannot set_blocksize to %u on device %s\n", - sectorsize, XFS_BUFTARG_NAME(btp)); + sectorsize, xfs_buf_target_name(btp)); return EINVAL; } @@ -1681,7 +1680,7 @@ xfs_buf_delwri_split( list_for_each_entry_safe(bp, n, dwq, b_list) { ASSERT(bp->b_flags & XBF_DELWRI); - if (!XFS_BUF_ISPINNED(bp) && xfs_buf_trylock(bp)) { + if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) { if (!force && time_before(jiffies, bp->b_queuetime + age)) { xfs_buf_unlock(bp); diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 6a83b46b4bcf..620972b8094d 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -228,11 +228,15 @@ extern void xfs_buf_delwri_promote(xfs_buf_t *); extern int xfs_buf_init(void); extern void xfs_buf_terminate(void); -#define xfs_buf_target_name(target) \ - ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; }) +static inline const char * +xfs_buf_target_name(struct xfs_buftarg *target) +{ + static char __b[BDEVNAME_SIZE]; + + return bdevname(target->bt_bdev, __b); +} -#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) #define XFS_BUF_ZEROFLAGS(bp) \ ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \ XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) @@ -251,23 +255,14 @@ void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) -#define XFS_BUF_ERROR(bp,no) xfs_buf_ioerror(bp,no) -#define XFS_BUF_GETERROR(bp) xfs_buf_geterror(bp) -#define XFS_BUF_ISERROR(bp) (xfs_buf_geterror(bp) ? 1 : 0) - #define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) #define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) #define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) -#define XFS_BUF_BUSY(bp) do { } while (0) -#define XFS_BUF_UNBUSY(bp) do { } while (0) -#define XFS_BUF_ISBUSY(bp) (1) - #define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC) #define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) #define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) -#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) #define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) #define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) #define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ) @@ -276,10 +271,6 @@ void xfs_buf_stale(struct xfs_buf *bp); #define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) #define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) -#define XFS_BUF_SET_START(bp) do { } while (0) - -#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) -#define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) #define XFS_BUF_ADDR(bp) ((bp)->b_bn) #define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) #define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset) @@ -299,14 +290,13 @@ xfs_buf_set_ref( #define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref) #define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) -#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) +static inline int xfs_buf_ispinned(struct xfs_buf *bp) +{ + return atomic_read(&bp->b_pin_count); +} #define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); -#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) -#define XFS_BUF_TARGET(bp) ((bp)->b_target) -#define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target) - static inline void xfs_buf_relse(xfs_buf_t *bp) { xfs_buf_unlock(bp); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index e4c938afb910..4604f90f86a3 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -332,7 +332,7 @@ xfs_sync_fsdata( * between there and here. */ bp = xfs_getsb(mp, 0); - if (XFS_BUF_ISPINNED(bp)) + if (xfs_buf_ispinned(bp)) xfs_log_force(mp, 0); return xfs_bwrite(mp, bp); diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 837f31158d43..db62959bed13 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -318,10 +318,9 @@ xfs_qm_init_dquot_blk( int curid, i; ASSERT(tp); - ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(xfs_buf_islocked(bp)); - d = (xfs_dqblk_t *)XFS_BUF_PTR(bp); + d = bp->b_addr; /* * ID of the first dquot in the block - id's are zero based. @@ -403,7 +402,7 @@ xfs_qm_dqalloc( dqp->q_blkno, mp->m_quotainfo->qi_dqchunklen, 0); - if (!bp || (error = XFS_BUF_GETERROR(bp))) + if (!bp || (error = xfs_buf_geterror(bp))) goto error1; /* * Make a chunk of dquots out of this buffer and log @@ -534,13 +533,12 @@ xfs_qm_dqtobp( return XFS_ERROR(error); } - ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(xfs_buf_islocked(bp)); /* * calculate the location of the dquot inside the buffer. */ - ddq = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset); + ddq = bp->b_addr + dqp->q_bufoffset; /* * A simple sanity check in case we got a corrupted dquot... @@ -553,7 +551,6 @@ xfs_qm_dqtobp( xfs_trans_brelse(tp, bp); return XFS_ERROR(EIO); } - XFS_BUF_BUSY(bp); /* We dirtied this */ } *O_bpp = bp; @@ -622,7 +619,6 @@ xfs_qm_dqread( * this particular dquot was repaired. We still aren't afraid to * brelse it because we have the changes incore. */ - ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(xfs_buf_islocked(bp)); xfs_trans_brelse(tp, bp); @@ -1204,7 +1200,7 @@ xfs_qm_dqflush( /* * Calculate the location of the dquot inside the buffer. */ - ddqp = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset); + ddqp = bp->b_addr + dqp->q_bufoffset; /* * A simple sanity check in case we got a corrupted dquot.. @@ -1240,7 +1236,7 @@ xfs_qm_dqflush( * If the buffer is pinned then push on the log so we won't * get stuck waiting in the write for too long. */ - if (XFS_BUF_ISPINNED(bp)) { + if (xfs_buf_ispinned(bp)) { trace_xfs_dqflush_force(dqp); xfs_log_force(mp, 0); } @@ -1447,7 +1443,7 @@ xfs_qm_dqflock_pushbuf_wait( goto out_lock; if (XFS_BUF_ISDELAYWRITE(bp)) { - if (XFS_BUF_ISPINNED(bp)) + if (xfs_buf_ispinned(bp)) xfs_log_force(mp, 0); xfs_buf_delwri_promote(bp); wake_up_process(bp->b_target->bt_task); diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 46e54ad9a2dc..9a0aa76facdf 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -1240,7 +1240,7 @@ xfs_qm_reset_dqcounts( do_div(j, sizeof(xfs_dqblk_t)); ASSERT(mp->m_quotainfo->qi_dqperchunk == j); #endif - ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp); + ddq = bp->b_addr; for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) { /* * Do a sanity check, and if needed, repair the dqblk. Don't diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 6530769a999b..4805f009f923 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -103,7 +103,7 @@ typedef struct xfs_agf { /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) #define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) -#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)XFS_BUF_PTR(bp)) +#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)((bp)->b_addr)) extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); @@ -156,7 +156,7 @@ typedef struct xfs_agi { /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) #define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) -#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)XFS_BUF_PTR(bp)) +#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)((bp)->b_addr)) extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp); @@ -168,7 +168,7 @@ extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, #define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log)) #define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp)) #define XFS_AGFL_SIZE(mp) ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t)) -#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)XFS_BUF_PTR(bp)) +#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr)) typedef struct xfs_agfl { __be32 agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */ diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 1e00b3ef6274..bdd9cb54d63b 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -451,8 +451,7 @@ xfs_alloc_read_agfl( XFS_FSS_TO_BB(mp, 1), 0, &bp); if (error) return error; - ASSERT(bp); - ASSERT(!XFS_BUF_GETERROR(bp)); + ASSERT(!xfs_buf_geterror(bp)); XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF); *bpp = bp; return 0; @@ -2116,7 +2115,7 @@ xfs_read_agf( if (!*bpp) return 0; - ASSERT(!XFS_BUF_GETERROR(*bpp)); + ASSERT(!(*bpp)->b_error); agf = XFS_BUF_TO_AGF(*bpp); /* @@ -2168,7 +2167,7 @@ xfs_alloc_read_agf( return error; if (!*bpp) return 0; - ASSERT(!XFS_BUF_GETERROR(*bpp)); + ASSERT(!(*bpp)->b_error); agf = XFS_BUF_TO_AGF(*bpp); pag = xfs_perag_get(mp, agno); diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index cbae424fe1ba..160bcdc34a6e 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -2121,8 +2121,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, XBF_LOCK | XBF_DONT_BLOCK); - ASSERT(bp); - ASSERT(!XFS_BUF_GETERROR(bp)); + ASSERT(!xfs_buf_geterror(bp)); tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : XFS_BUF_SIZE(bp); diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index ab3e5c6c4642..452a291383ab 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -3383,8 +3383,7 @@ xfs_bmap_local_to_extents( ASSERT(args.len == 1); *firstblock = args.fsbno; bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); - memcpy((char *)XFS_BUF_PTR(bp), ifp->if_u1.if_data, - ifp->if_bytes); + memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); xfs_bmap_forkoff_reset(args.mp, ip, whichfork); xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index cabf4b5604aa..2b9fd385e27d 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -275,8 +275,7 @@ xfs_btree_dup_cursor( return error; } new->bc_bufs[i] = bp; - ASSERT(bp); - ASSERT(!XFS_BUF_GETERROR(bp)); + ASSERT(!xfs_buf_geterror(bp)); } else new->bc_bufs[i] = NULL; } @@ -467,8 +466,7 @@ xfs_btree_get_bufl( ASSERT(fsbno != NULLFSBLOCK); d = XFS_FSB_TO_DADDR(mp, fsbno); bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); - ASSERT(bp); - ASSERT(!XFS_BUF_GETERROR(bp)); + ASSERT(!xfs_buf_geterror(bp)); return bp; } @@ -491,8 +489,7 @@ xfs_btree_get_bufs( ASSERT(agbno != NULLAGBLOCK); d = XFS_AGB_TO_DADDR(mp, agno, agbno); bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); - ASSERT(bp); - ASSERT(!XFS_BUF_GETERROR(bp)); + ASSERT(!xfs_buf_geterror(bp)); return bp; } @@ -632,7 +629,7 @@ xfs_btree_read_bufl( mp->m_bsize, lock, &bp))) { return error; } - ASSERT(!bp || !XFS_BUF_GETERROR(bp)); + ASSERT(!xfs_buf_geterror(bp)); if (bp) XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); *bpp = bp; @@ -973,8 +970,7 @@ xfs_btree_get_buf_block( *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, mp->m_bsize, flags); - ASSERT(*bpp); - ASSERT(!XFS_BUF_GETERROR(*bpp)); + ASSERT(!xfs_buf_geterror(*bpp)); *block = XFS_BUF_TO_BLOCK(*bpp); return 0; @@ -1006,8 +1002,7 @@ xfs_btree_read_buf_block( if (error) return error; - ASSERT(*bpp != NULL); - ASSERT(!XFS_BUF_GETERROR(*bpp)); + ASSERT(!xfs_buf_geterror(*bpp)); xfs_btree_set_refs(cur, *bpp); *block = XFS_BUF_TO_BLOCK(*bpp); diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 8d05a6a46ce3..5b240de104c0 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -262,7 +262,7 @@ typedef struct xfs_btree_cur /* * Convert from buffer to btree block header. */ -#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)XFS_BUF_PTR(bp)) +#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)((bp)->b_addr)) /* diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 88492916c3dc..cac2ecfa6746 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -124,9 +124,9 @@ xfs_buf_item_log_check( bp = bip->bli_buf; ASSERT(XFS_BUF_COUNT(bp) > 0); - ASSERT(XFS_BUF_PTR(bp) != NULL); + ASSERT(bp->b_addr != NULL); orig = bip->bli_orig; - buffer = XFS_BUF_PTR(bp); + buffer = bp->b_addr; for (x = 0; x < XFS_BUF_COUNT(bp); x++) { if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) { xfs_emerg(bp->b_mount, @@ -371,7 +371,6 @@ xfs_buf_item_pin( { struct xfs_buf_log_item *bip = BUF_ITEM(lip); - ASSERT(XFS_BUF_ISBUSY(bip->bli_buf)); ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || (bip->bli_flags & XFS_BLI_STALE)); @@ -479,13 +478,13 @@ xfs_buf_item_trylock( struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; - if (XFS_BUF_ISPINNED(bp)) + if (xfs_buf_ispinned(bp)) return XFS_ITEM_PINNED; if (!xfs_buf_trylock(bp)) return XFS_ITEM_LOCKED; /* take a reference to the buffer. */ - XFS_BUF_HOLD(bp); + xfs_buf_hold(bp); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); trace_xfs_buf_item_trylock(bip); @@ -726,7 +725,7 @@ xfs_buf_item_init( * to have logged. */ bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP); - memcpy(bip->bli_orig, XFS_BUF_PTR(bp), XFS_BUF_COUNT(bp)); + memcpy(bip->bli_orig, bp->b_addr, XFS_BUF_COUNT(bp)); bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP); #endif @@ -895,7 +894,6 @@ xfs_buf_attach_iodone( { xfs_log_item_t *head_lip; - ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(xfs_buf_islocked(bp)); lip->li_cb = cb; @@ -960,7 +958,7 @@ xfs_buf_iodone_callbacks( static ulong lasttime; static xfs_buftarg_t *lasttarg; - if (likely(!XFS_BUF_GETERROR(bp))) + if (likely(!xfs_buf_geterror(bp))) goto do_callbacks; /* @@ -973,14 +971,14 @@ xfs_buf_iodone_callbacks( goto do_callbacks; } - if (XFS_BUF_TARGET(bp) != lasttarg || + if (bp->b_target != lasttarg || time_after(jiffies, (lasttime + 5*HZ))) { lasttime = jiffies; xfs_alert(mp, "Device %s: metadata write error block 0x%llx", - XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), + xfs_buf_target_name(bp->b_target), (__uint64_t)XFS_BUF_ADDR(bp)); } - lasttarg = XFS_BUF_TARGET(bp); + lasttarg = bp->b_target; /* * If the write was asynchronous then no one will be looking for the @@ -991,12 +989,11 @@ xfs_buf_iodone_callbacks( * around. */ if (XFS_BUF_ISASYNC(bp)) { - XFS_BUF_ERROR(bp, 0); /* errno of 0 unsets the flag */ + xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ if (!XFS_BUF_ISSTALE(bp)) { XFS_BUF_DELAYWRITE(bp); XFS_BUF_DONE(bp); - XFS_BUF_SET_START(bp); } ASSERT(bp->b_iodone != NULL); trace_xfs_buf_item_iodone_async(bp, _RET_IP_); @@ -1013,7 +1010,6 @@ xfs_buf_iodone_callbacks( XFS_BUF_UNDELAYWRITE(bp); trace_xfs_buf_error_relse(bp, _RET_IP_); - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); do_callbacks: xfs_buf_do_callbacks(bp); diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 5bfcb8779f9f..ee9d5427fcd4 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -2050,7 +2050,7 @@ xfs_da_do_buf( case 0: bp = xfs_trans_get_buf(trans, mp->m_ddev_targp, mappedbno, nmapped, 0); - error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO); + error = bp ? bp->b_error : XFS_ERROR(EIO); break; case 1: case 2: @@ -2268,7 +2268,7 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps) dabuf->nbuf = 1; bp = bps[0]; dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp)); - dabuf->data = XFS_BUF_PTR(bp); + dabuf->data = bp->b_addr; dabuf->bps[0] = bp; } else { dabuf->nbuf = nbuf; @@ -2279,7 +2279,7 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps) dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP); for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) { bp = bps[i]; - memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp), + memcpy((char *)dabuf->data + off, bp->b_addr, XFS_BUF_COUNT(bp)); } } @@ -2302,8 +2302,8 @@ xfs_da_buf_clean(xfs_dabuf_t *dabuf) for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) { bp = dabuf->bps[i]; - memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off, - XFS_BUF_COUNT(bp)); + memcpy(bp->b_addr, dabuf->data + off, + XFS_BUF_COUNT(bp)); } } } @@ -2340,7 +2340,7 @@ xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last) ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); if (dabuf->nbuf == 1) { - ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0])); + ASSERT(dabuf->data == dabuf->bps[0]->b_addr); xfs_trans_log_buf(tp, dabuf->bps[0], first, last); return; } diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index dffba9ba0db6..a3721633abc8 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -148,7 +148,7 @@ typedef enum xfs_dinode_fmt { be32_to_cpu((dip)->di_nextents) : \ be16_to_cpu((dip)->di_anextents)) -#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp)) +#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)((bp)->b_addr)) /* * For block and character special files the 32bit dev_t is stored at the diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index dd5628bd8d0b..9f24ec28283b 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -202,8 +202,7 @@ xfs_ialloc_inode_init( fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize * blks_per_cluster, XBF_LOCK); - ASSERT(fbuf); - ASSERT(!XFS_BUF_GETERROR(fbuf)); + ASSERT(!xfs_buf_geterror(fbuf)); /* * Initialize all inodes in this buffer and then log them. @@ -1486,7 +1485,7 @@ xfs_read_agi( if (error) return error; - ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp)); + ASSERT(!xfs_buf_geterror(*bpp)); agi = XFS_BUF_TO_AGI(*bpp); /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2fcca4b03ed3..0239a7c7c886 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2473,7 +2473,7 @@ cluster_corrupt_out: if (bp->b_iodone) { XFS_BUF_UNDONE(bp); XFS_BUF_STALE(bp); - XFS_BUF_ERROR(bp,EIO); + xfs_buf_ioerror(bp, EIO); xfs_buf_ioend(bp, 0); } else { XFS_BUF_STALE(bp); @@ -2585,7 +2585,7 @@ xfs_iflush( * If the buffer is pinned then push on the log now so we won't * get stuck waiting in the write for too long. */ - if (XFS_BUF_ISPINNED(bp)) + if (xfs_buf_ispinned(bp)) xfs_log_force(mp, 0); /* diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 06ff8437ed8e..3a8d4f66d702 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -878,7 +878,7 @@ xlog_iodone(xfs_buf_t *bp) /* * Race to shutdown the filesystem if we see an error. */ - if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, + if (XFS_TEST_ERROR((xfs_buf_geterror(bp)), l->l_mp, XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) { xfs_ioerror_alert("xlog_iodone", l->l_mp, bp, XFS_BUF_ADDR(bp)); XFS_BUF_STALE(bp); @@ -1051,7 +1051,6 @@ xlog_alloc_log(xfs_mount_t *mp, if (!bp) goto out_free_log; bp->b_iodone = xlog_iodone; - ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(xfs_buf_islocked(bp)); log->l_xbuf = bp; @@ -1108,7 +1107,6 @@ xlog_alloc_log(xfs_mount_t *mp, iclog->ic_callback_tail = &(iclog->ic_callback); iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; - ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); ASSERT(xfs_buf_islocked(iclog->ic_bp)); init_waitqueue_head(&iclog->ic_force_wait); init_waitqueue_head(&iclog->ic_write_wait); @@ -1248,7 +1246,7 @@ xlog_bdstrat( struct xlog_in_core *iclog = bp->b_fspriv; if (iclog->ic_state & XLOG_STATE_IOERROR) { - XFS_BUF_ERROR(bp, EIO); + xfs_buf_ioerror(bp, EIO); XFS_BUF_STALE(bp); xfs_buf_ioend(bp, 0); /* @@ -1355,7 +1353,6 @@ xlog_sync(xlog_t *log, XFS_BUF_SET_COUNT(bp, count); bp->b_fspriv = iclog; XFS_BUF_ZEROFLAGS(bp); - XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); bp->b_flags |= XBF_SYNCIO; @@ -1398,16 +1395,15 @@ xlog_sync(xlog_t *log, if (split) { bp = iclog->ic_log->l_xbuf; XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ - XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+ - (__psint_t)count), split); + xfs_buf_associate_memory(bp, + (char *)&iclog->ic_header + count, split); bp->b_fspriv = iclog; XFS_BUF_ZEROFLAGS(bp); - XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); bp->b_flags |= XBF_SYNCIO; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) bp->b_flags |= XBF_FUA; - dptr = XFS_BUF_PTR(bp); + dptr = bp->b_addr; /* * Bump the cycle numbers at the start of each block * since this part of the buffer is at the start of diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 052a2c0ec5fb..a199dbcee7d8 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -147,7 +147,7 @@ xlog_align( xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp)); - return XFS_BUF_PTR(bp) + BBTOB(offset); + return bp->b_addr + BBTOB(offset); } @@ -178,9 +178,7 @@ xlog_bread_noalign( XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); XFS_BUF_READ(bp); - XFS_BUF_BUSY(bp); XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); - XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); xfsbdstrat(log->l_mp, bp); error = xfs_buf_iowait(bp); @@ -220,18 +218,18 @@ xlog_bread_offset( xfs_buf_t *bp, xfs_caddr_t offset) { - xfs_caddr_t orig_offset = XFS_BUF_PTR(bp); + xfs_caddr_t orig_offset = bp->b_addr; int orig_len = bp->b_buffer_length; int error, error2; - error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks)); + error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); if (error) return error; error = xlog_bread_noalign(log, blk_no, nbblks, bp); /* must reset buffer pointer even on error */ - error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len); + error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len); if (error) return error; return error2; @@ -266,11 +264,9 @@ xlog_bwrite( XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); XFS_BUF_ZEROFLAGS(bp); - XFS_BUF_BUSY(bp); - XFS_BUF_HOLD(bp); + xfs_buf_hold(bp); xfs_buf_lock(bp); XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); - XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); if ((error = xfs_bwrite(log->l_mp, bp))) xfs_ioerror_alert("xlog_bwrite", log->l_mp, @@ -360,7 +356,7 @@ STATIC void xlog_recover_iodone( struct xfs_buf *bp) { - if (XFS_BUF_GETERROR(bp)) { + if (bp->b_error) { /* * We're not going to bother about retrying * this during recovery. One strike! @@ -1262,7 +1258,7 @@ xlog_write_log_records( */ ealign = round_down(end_block, sectbb); if (j == 0 && (start_block + endcount > ealign)) { - offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block); + offset = bp->b_addr + BBTOB(ealign - start_block); error = xlog_bread_offset(log, ealign, sectbb, bp, offset); if (error) @@ -2135,15 +2131,16 @@ xlog_recover_buffer_pass2( bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, buf_flags); - if (XFS_BUF_ISERROR(bp)) { + if (!bp) + return XFS_ERROR(ENOMEM); + error = bp->b_error; + if (error) { xfs_ioerror_alert("xlog_recover_do..(read#1)", mp, bp, buf_f->blf_blkno); - error = XFS_BUF_GETERROR(bp); xfs_buf_relse(bp); return error; } - error = 0; if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); } else if (buf_f->blf_flags & @@ -2227,14 +2224,17 @@ xlog_recover_inode_pass2( bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, XBF_LOCK); - if (XFS_BUF_ISERROR(bp)) { + if (!bp) { + error = ENOMEM; + goto error; + } + error = bp->b_error; + if (error) { xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, bp, in_f->ilf_blkno); - error = XFS_BUF_GETERROR(bp); xfs_buf_relse(bp); goto error; } - error = 0; ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); @@ -3437,7 +3437,7 @@ xlog_do_recovery_pass( /* * Check for header wrapping around physical end-of-log */ - offset = XFS_BUF_PTR(hbp); + offset = hbp->b_addr; split_hblks = 0; wrapped_hblks = 0; if (blk_no + hblks <= log->l_logBBsize) { @@ -3497,7 +3497,7 @@ xlog_do_recovery_pass( } else { /* This log record is split across the * physical end of log */ - offset = XFS_BUF_PTR(dbp); + offset = dbp->b_addr; split_bblks = 0; if (blk_no != log->l_logBBsize) { /* some data is before the physical diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 092e16ae4d9d..0081657ad985 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1615,7 +1615,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) XFS_BUF_UNDELAYWRITE(sbp); XFS_BUF_WRITE(sbp); XFS_BUF_UNASYNC(sbp); - ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); + ASSERT(sbp->b_target == mp->m_ddev_targp); xfsbdstrat(mp, sbp); error = xfs_buf_iowait(sbp); if (error) @@ -1938,7 +1938,7 @@ xfs_getsb( xfs_buf_lock(bp); } - XFS_BUF_HOLD(bp); + xfs_buf_hold(bp); ASSERT(XFS_BUF_ISDONE(bp)); return bp; } diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 8f76fdff4f46..35561a511b57 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -168,7 +168,7 @@ error_cancel: xfs_trans_cancel(tp, cancelflags); goto error; } - memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize); + memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); /* * Commit the transaction. @@ -883,7 +883,7 @@ xfs_rtbuf_get( if (error) { return error; } - ASSERT(bp && !XFS_BUF_GETERROR(bp)); + ASSERT(!xfs_buf_geterror(bp)); *bpp = bp; return 0; } @@ -943,7 +943,7 @@ xfs_rtcheck_range( if (error) { return error; } - bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + bufp = bp->b_addr; /* * Compute the starting word's address, and starting bit. */ @@ -994,7 +994,7 @@ xfs_rtcheck_range( if (error) { return error; } - b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + b = bufp = bp->b_addr; word = 0; } else { /* @@ -1040,7 +1040,7 @@ xfs_rtcheck_range( if (error) { return error; } - b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + b = bufp = bp->b_addr; word = 0; } else { /* @@ -1158,7 +1158,7 @@ xfs_rtfind_back( if (error) { return error; } - bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + bufp = bp->b_addr; /* * Get the first word's index & point to it. */ @@ -1210,7 +1210,7 @@ xfs_rtfind_back( if (error) { return error; } - bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + bufp = bp->b_addr; word = XFS_BLOCKWMASK(mp); b = &bufp[word]; } else { @@ -1256,7 +1256,7 @@ xfs_rtfind_back( if (error) { return error; } - bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + bufp = bp->b_addr; word = XFS_BLOCKWMASK(mp); b = &bufp[word]; } else { @@ -1333,7 +1333,7 @@ xfs_rtfind_forw( if (error) { return error; } - bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + bufp = bp->b_addr; /* * Get the first word's index & point to it. */ @@ -1384,7 +1384,7 @@ xfs_rtfind_forw( if (error) { return error; } - b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + b = bufp = bp->b_addr; word = 0; } else { /* @@ -1429,7 +1429,7 @@ xfs_rtfind_forw( if (error) { return error; } - b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + b = bufp = bp->b_addr; word = 0; } else { /* @@ -1649,7 +1649,7 @@ xfs_rtmodify_range( if (error) { return error; } - bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + bufp = bp->b_addr; /* * Compute the starting word's address, and starting bit. */ @@ -1694,7 +1694,7 @@ xfs_rtmodify_range( if (error) { return error; } - first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + first = b = bufp = bp->b_addr; word = 0; } else { /* @@ -1734,7 +1734,7 @@ xfs_rtmodify_range( if (error) { return error; } - first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp); + first = b = bufp = bp->b_addr; word = 0; } else { /* @@ -1832,8 +1832,8 @@ xfs_rtmodify_summary( */ sp = XFS_SUMPTR(mp, bp, so); *sp += delta; - xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)XFS_BUF_PTR(bp)), - (uint)((char *)sp - (char *)XFS_BUF_PTR(bp) + sizeof(*sp) - 1)); + xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), + (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); return 0; } diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 09e1f4f35e97..f7f3a359c1c5 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -47,7 +47,7 @@ struct xfs_trans; #define XFS_SUMOFFSTOBLOCK(mp,s) \ (((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog) #define XFS_SUMPTR(mp,bp,so) \ - ((xfs_suminfo_t *)((char *)XFS_BUF_PTR(bp) + \ + ((xfs_suminfo_t *)((bp)->b_addr + \ (((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp)))) #define XFS_BITTOBLOCK(mp,bi) ((bi) >> (mp)->m_blkbit_log) diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index d6d6fdfe9422..c96a8a05ac03 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -104,9 +104,9 @@ xfs_ioerror_alert( xfs_alert(mp, "I/O error occurred: meta-data dev %s block 0x%llx" " (\"%s\") error %d buf count %zd", - XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), + xfs_buf_target_name(bp->b_target), (__uint64_t)blkno, func, - XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp)); + bp->b_error, XFS_BUF_COUNT(bp)); } /* @@ -137,8 +137,8 @@ xfs_read_buf( bp = xfs_buf_read(target, blkno, len, flags); if (!bp) return XFS_ERROR(EIO); - error = XFS_BUF_GETERROR(bp); - if (bp && !error && !XFS_FORCED_SHUTDOWN(mp)) { + error = bp->b_error; + if (!error && !XFS_FORCED_SHUTDOWN(mp)) { *bpp = bp; } else { *bpp = NULL; diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 1eb2ba586814..cb6ae715814a 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -509,7 +509,7 @@ static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp) #define XFS_SB_DADDR ((xfs_daddr_t)0) /* daddr in filesystem/ag */ #define XFS_SB_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_SB_DADDR) -#define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)XFS_BUF_PTR(bp)) +#define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)((bp)->b_addr)) #define XFS_HDR_BLOCK(mp,d) ((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d)) #define XFS_DADDR_TO_FSB(mp,d) XFS_AGB_TO_FSB(mp, \ diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 43233e92f0f6..c15aa29fa169 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -299,7 +299,7 @@ xfs_trans_ail_cursor_last( * Splice the log item list into the AIL at the given LSN. We splice to the * tail of the given LSN to maintain insert order for push traversals. The * cursor is optional, allowing repeated updates to the same LSN to avoid - * repeated traversals. + * repeated traversals. This should not be called with an empty list. */ static void xfs_ail_splice( @@ -308,50 +308,39 @@ xfs_ail_splice( struct list_head *list, xfs_lsn_t lsn) { - struct xfs_log_item *lip = cur ? cur->item : NULL; - struct xfs_log_item *next_lip; + struct xfs_log_item *lip; + + ASSERT(!list_empty(list)); /* - * Get a new cursor if we don't have a placeholder or the existing one - * has been invalidated. + * Use the cursor to determine the insertion point if one is + * provided. If not, or if the one we got is not valid, + * find the place in the AIL where the items belong. */ - if (!lip || (__psint_t)lip & 1) { + lip = cur ? cur->item : NULL; + if (!lip || (__psint_t) lip & 1) lip = __xfs_trans_ail_cursor_last(ailp, lsn); - if (!lip) { - /* The list is empty, so just splice and return. */ - if (cur) - cur->item = NULL; - list_splice(list, &ailp->xa_ail); - return; - } - } + /* + * If a cursor is provided, we know we're processing the AIL + * in lsn order, and future items to be spliced in will + * follow the last one being inserted now. Update the + * cursor to point to that last item, now while we have a + * reliable pointer to it. + */ + if (cur) + cur->item = list_entry(list->prev, struct xfs_log_item, li_ail); /* - * Our cursor points to the item we want to insert _after_, so we have - * to update the cursor to point to the end of the list we are splicing - * in so that it points to the correct location for the next splice. - * i.e. before the splice - * - * lsn -> lsn -> lsn + x -> lsn + x ... - * ^ - * | cursor points here - * - * After the splice we have: - * - * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ... - * ^ ^ - * | cursor points here | needs to move here - * - * So we set the cursor to the last item in the list to be spliced - * before we execute the splice, resulting in the cursor pointing to - * the correct item after the splice occurs. + * Finally perform the splice. Unless the AIL was empty, + * lip points to the item in the AIL _after_ which the new + * items should go. If lip is null the AIL was empty, so + * the new items go at the head of the AIL. */ - if (cur) { - next_lip = list_entry(list->prev, struct xfs_log_item, li_ail); - cur->item = next_lip; - } - list_splice(list, &lip->li_ail); + if (lip) + list_splice(list, &lip->li_ail); + else + list_splice(list, &ailp->xa_ail); } /* @@ -682,6 +671,7 @@ xfs_trans_ail_update_bulk( int i; LIST_HEAD(tmp); + ASSERT(nr_items > 0); /* Not required, but true. */ mlip = xfs_ail_min(ailp); for (i = 0; i < nr_items; i++) { @@ -701,7 +691,8 @@ xfs_trans_ail_update_bulk( list_add(&lip->li_ail, &tmp); } - xfs_ail_splice(ailp, cur, &tmp, lsn); + if (!list_empty(&tmp)) + xfs_ail_splice(ailp, cur, &tmp, lsn); if (!mlip_changed) { spin_unlock(&ailp->xa_lock); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 15584fc3ed7d..137e2b9e2948 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -54,7 +54,7 @@ xfs_trans_buf_item_match( list_for_each_entry(lidp, &tp->t_items, lid_trans) { blip = (struct xfs_buf_log_item *)lidp->lid_item; if (blip->bli_item.li_type == XFS_LI_BUF && - XFS_BUF_TARGET(blip->bli_buf) == target && + blip->bli_buf->b_target == target && XFS_BUF_ADDR(blip->bli_buf) == blkno && XFS_BUF_COUNT(blip->bli_buf) == len) return blip->bli_buf; @@ -80,7 +80,6 @@ _xfs_trans_bjoin( { struct xfs_buf_log_item *bip; - ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(bp->b_transp == NULL); /* @@ -194,7 +193,7 @@ xfs_trans_get_buf(xfs_trans_t *tp, return NULL; } - ASSERT(!XFS_BUF_GETERROR(bp)); + ASSERT(!bp->b_error); _xfs_trans_bjoin(tp, bp, 1); trace_xfs_trans_get_buf(bp->b_fspriv); @@ -293,10 +292,10 @@ xfs_trans_read_buf( return (flags & XBF_TRYLOCK) ? EAGAIN : XFS_ERROR(ENOMEM); - if (XFS_BUF_GETERROR(bp) != 0) { + if (bp->b_error) { + error = bp->b_error; xfs_ioerror_alert("xfs_trans_read_buf", mp, bp, blkno); - error = XFS_BUF_GETERROR(bp); xfs_buf_relse(bp); return error; } @@ -330,7 +329,7 @@ xfs_trans_read_buf( ASSERT(xfs_buf_islocked(bp)); ASSERT(bp->b_transp == tp); ASSERT(bp->b_fspriv != NULL); - ASSERT((XFS_BUF_ISERROR(bp)) == 0); + ASSERT(!bp->b_error); if (!(XFS_BUF_ISDONE(bp))) { trace_xfs_trans_read_buf_io(bp, _RET_IP_); ASSERT(!XFS_BUF_ISASYNC(bp)); @@ -386,10 +385,9 @@ xfs_trans_read_buf( return (flags & XBF_TRYLOCK) ? 0 : XFS_ERROR(ENOMEM); } - if (XFS_BUF_GETERROR(bp) != 0) { - XFS_BUF_SUPER_STALE(bp); - error = XFS_BUF_GETERROR(bp); - + if (bp->b_error) { + error = bp->b_error; + XFS_BUF_SUPER_STALE(bp); xfs_ioerror_alert("xfs_trans_read_buf", mp, bp, blkno); if (tp->t_flags & XFS_TRANS_DIRTY) @@ -430,7 +428,7 @@ shutdown_abort: if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) xfs_notice(mp, "about to pop assert, bp == 0x%p", bp); #endif - ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) != + ASSERT((bp->b_flags & (XBF_STALE|XBF_DELWRI)) != (XBF_STALE|XBF_DELWRI)); trace_xfs_trans_read_buf_shut(bp, _RET_IP_); @@ -581,7 +579,6 @@ xfs_trans_bhold(xfs_trans_t *tp, { xfs_buf_log_item_t *bip = bp->b_fspriv; - ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); @@ -602,7 +599,6 @@ xfs_trans_bhold_release(xfs_trans_t *tp, { xfs_buf_log_item_t *bip = bp->b_fspriv; - ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); @@ -631,7 +627,6 @@ xfs_trans_log_buf(xfs_trans_t *tp, { xfs_buf_log_item_t *bip = bp->b_fspriv; - ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp))); @@ -702,7 +697,6 @@ xfs_trans_binval( { xfs_buf_log_item_t *bip = bp->b_fspriv; - ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); @@ -774,7 +768,6 @@ xfs_trans_inode_buf( { xfs_buf_log_item_t *bip = bp->b_fspriv; - ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); @@ -798,7 +791,6 @@ xfs_trans_stale_inode_buf( { xfs_buf_log_item_t *bip = bp->b_fspriv; - ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); @@ -823,7 +815,6 @@ xfs_trans_inode_alloc_buf( { xfs_buf_log_item_t *bip = bp->b_fspriv; - ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); @@ -851,7 +842,6 @@ xfs_trans_dquot_buf( { xfs_buf_log_item_t *bip = bp->b_fspriv; - ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(bp->b_transp == tp); ASSERT(bip != NULL); ASSERT(type == XFS_BLF_UDQUOT_BUF || diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 9322e13f0c63..51fc429527bc 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -83,7 +83,9 @@ xfs_readlink_bmap( bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK); - error = XFS_BUF_GETERROR(bp); + if (!bp) + return XFS_ERROR(ENOMEM); + error = bp->b_error; if (error) { xfs_ioerror_alert("xfs_readlink", ip->i_mount, bp, XFS_BUF_ADDR(bp)); @@ -94,7 +96,7 @@ xfs_readlink_bmap( byte_cnt = pathlen; pathlen -= byte_cnt; - memcpy(link, XFS_BUF_PTR(bp), byte_cnt); + memcpy(link, bp->b_addr, byte_cnt); xfs_buf_relse(bp); } @@ -1648,13 +1650,13 @@ xfs_symlink( byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); - ASSERT(bp && !XFS_BUF_GETERROR(bp)); + ASSERT(!xfs_buf_geterror(bp)); if (pathlen < byte_cnt) { byte_cnt = pathlen; } pathlen -= byte_cnt; - memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt); + memcpy(bp->b_addr, cur_chunk, byte_cnt); cur_chunk += byte_cnt; xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1); @@ -1999,7 +2001,7 @@ xfs_zero_remaining_bytes( mp, bp, XFS_BUF_ADDR(bp)); break; } - memset(XFS_BUF_PTR(bp) + + memset(bp->b_addr + (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 0, lastoffset - offset + 1); XFS_BUF_UNDONE(bp); |