summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-01-24 16:13:08 -0500
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:03:59 -0400
commitd1310b2e0cd98eb1348553e69b73827b436dca7b (patch)
treef55e9c380df0f3a7e9fb792b2308b52b632b90aa
parent5f56406aabdf5444d040c5955effc665b1d0dbaf (diff)
Btrfs: Split the extent_map code into two parts
There is now extent_map for mapping offsets in the file to disk and extent_io for state tracking, IO submission and extent_bufers. The new extent_map code shifts from [start,end] pairs to [start,len], and pushes the locking out into the caller. This allows a few performance optimizations and is easier to use. A number of extent_map usage bugs were fixed, mostly with failing to remove extent_map entries when changing the file. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/Makefile3
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/ctree.h15
-rw-r--r--fs/btrfs/disk-io.c123
-rw-r--r--fs/btrfs/extent-tree.c43
-rw-r--r--fs/btrfs/extent_io.c3089
-rw-r--r--fs/btrfs/extent_io.h193
-rw-r--r--fs/btrfs/extent_map.c3119
-rw-r--r--fs/btrfs/extent_map.h214
-rw-r--r--fs/btrfs/file.c29
-rw-r--r--fs/btrfs/inode.c175
-rw-r--r--fs/btrfs/super.c10
-rw-r--r--fs/btrfs/transaction.c8
-rw-r--r--fs/btrfs/transaction.h2
14 files changed, 3612 insertions, 3413 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 72cc3e16741f..9c61609f3714 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -5,7 +5,8 @@ obj-m := btrfs.o
btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
hash.o file-item.o inode-item.o inode-map.o disk-io.o \
transaction.o bit-radix.o inode.o file.o tree-defrag.o \
- extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o
+ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
+ extent_io.o
ifeq ($(CONFIG_FS_POSIX_ACL),y)
btrfs-y += acl.o
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 9fc99883c70a..9a0647dc5a0b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -20,6 +20,7 @@
#define __BTRFS_I__
#include "extent_map.h"
+#include "extent_io.h"
/* in memory btrfs inode */
struct btrfs_inode {
@@ -27,6 +28,7 @@ struct btrfs_inode {
struct btrfs_block_group_cache *block_group;
struct btrfs_key location;
struct extent_map_tree extent_tree;
+ struct extent_io_tree io_tree;
struct inode vfs_inode;
u64 ordered_trans;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b9f2975b55c9..6c65473e0fe3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -27,6 +27,7 @@
#include <linux/completion.h>
#include <asm/kmap_types.h>
#include "bit-radix.h"
+#include "extent_io.h"
#include "extent_map.h"
struct btrfs_trans_handle;
@@ -314,11 +315,11 @@ struct btrfs_fs_info {
struct btrfs_root *tree_root;
struct radix_tree_root fs_roots_radix;
- struct extent_map_tree free_space_cache;
- struct extent_map_tree block_group_cache;
- struct extent_map_tree pinned_extents;
- struct extent_map_tree pending_del;
- struct extent_map_tree extent_ins;
+ struct extent_io_tree free_space_cache;
+ struct extent_io_tree block_group_cache;
+ struct extent_io_tree pinned_extents;
+ struct extent_io_tree pending_del;
+ struct extent_io_tree extent_ins;
u64 generation;
u64 last_trans_committed;
@@ -956,7 +957,7 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
u64 first_extent);
int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
-int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy);
+int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
btrfs_fs_info *info,
u64 bytenr);
@@ -1001,7 +1002,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
u64 owner_objectid, u64 owner_offset, int pin);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct extent_map_tree *unpin);
+ struct extent_io_tree *unpin);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5d1f9bca2712..4c4ebea0b2a9 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -43,14 +43,14 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
}
#endif
-static struct extent_map_ops btree_extent_map_ops;
+static struct extent_io_ops btree_extent_io_ops;
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize)
{
struct inode *btree_inode = root->fs_info->btree_inode;
struct extent_buffer *eb;
- eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
+ eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
bytenr, blocksize, GFP_NOFS);
return eb;
}
@@ -61,13 +61,13 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
struct inode *btree_inode = root->fs_info->btree_inode;
struct extent_buffer *eb;
- eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
+ eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
bytenr, blocksize, NULL, GFP_NOFS);
return eb;
}
struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
- size_t page_offset, u64 start, u64 end,
+ size_t page_offset, u64 start, u64 len,
int create)
{
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
@@ -75,7 +75,9 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page,
int ret;
again:
- em = lookup_extent_mapping(em_tree, start, end);
+ spin_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, start, len);
+ spin_unlock(&em_tree->lock);
if (em) {
goto out;
}
@@ -85,11 +87,14 @@ again:
goto out;
}
em->start = 0;
- em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1;
+ em->len = i_size_read(inode);
em->block_start = 0;
- em->block_end = em->end;
em->bdev = inode->i_sb->s_bdev;
+
+ spin_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
+ spin_unlock(&em_tree->lock);
+
if (ret == -EEXIST) {
free_extent_map(em);
em = NULL;
@@ -175,13 +180,13 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
{
- struct extent_map_tree *tree;
+ struct extent_io_tree *tree;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 found_start;
int found_level;
unsigned long len;
struct extent_buffer *eb;
- tree = &BTRFS_I(page->mapping->host)->extent_tree;
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
if (page->private == EXTENT_PAGE_PRIVATE)
goto out;
@@ -230,16 +235,16 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end)
static int btree_writepage(struct page *page, struct writeback_control *wbc)
{
- struct extent_map_tree *tree;
- tree = &BTRFS_I(page->mapping->host)->extent_tree;
+ struct extent_io_tree *tree;
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
return extent_write_full_page(tree, page, btree_get_extent, wbc);
}
static int btree_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- struct extent_map_tree *tree;
- tree = &BTRFS_I(mapping->host)->extent_tree;
+ struct extent_io_tree *tree;
+ tree = &BTRFS_I(mapping->host)->io_tree;
if (wbc->sync_mode == WB_SYNC_NONE) {
u64 num_dirty;
u64 start = 0;
@@ -264,18 +269,20 @@ static int btree_writepages(struct address_space *mapping,
int btree_readpage(struct file *file, struct page *page)
{
- struct extent_map_tree *tree;
- tree = &BTRFS_I(page->mapping->host)->extent_tree;
+ struct extent_io_tree *tree;
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
return extent_read_full_page(tree, page, btree_get_extent);
}
static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags)
{
- struct extent_map_tree *tree;
+ struct extent_io_tree *tree;
+ struct extent_map_tree *map;
int ret;
- tree = &BTRFS_I(page->mapping->host)->extent_tree;
- ret = try_release_extent_mapping(tree, page);
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
+ map = &BTRFS_I(page->mapping->host)->extent_tree;
+ ret = try_release_extent_mapping(map, tree, page);
if (ret == 1) {
ClearPagePrivate(page);
set_page_private(page, 0);
@@ -286,8 +293,8 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags)
static void btree_invalidatepage(struct page *page, unsigned long offset)
{
- struct extent_map_tree *tree;
- tree = &BTRFS_I(page->mapping->host)->extent_tree;
+ struct extent_io_tree *tree;
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
extent_invalidatepage(tree, page, offset);
btree_releasepage(page, GFP_NOFS);
}
@@ -331,7 +338,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
if (!buf)
return 0;
- read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
+ read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
buf, 0, 0);
free_extent_buffer(buf);
return ret;
@@ -342,40 +349,39 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
{
struct extent_buffer *buf = NULL;
struct inode *btree_inode = root->fs_info->btree_inode;
- struct extent_map_tree *extent_tree;
+ struct extent_io_tree *io_tree;
u64 end;
int ret;
- extent_tree = &BTRFS_I(btree_inode)->extent_tree;
+ io_tree = &BTRFS_I(btree_inode)->io_tree;
buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
if (!buf)
return NULL;
- read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
- buf, 0, 1);
+ read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1);
if (buf->flags & EXTENT_CSUM)
return buf;
end = buf->start + PAGE_CACHE_SIZE - 1;
- if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) {
+ if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) {
buf->flags |= EXTENT_CSUM;
return buf;
}
- lock_extent(extent_tree, buf->start, end, GFP_NOFS);
+ lock_extent(io_tree, buf->start, end, GFP_NOFS);
- if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) {
+ if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) {
buf->flags |= EXTENT_CSUM;
goto out_unlock;
}
ret = csum_tree_block(root, buf, 1);
- set_extent_bits(extent_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS);
+ set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS);
buf->flags |= EXTENT_CSUM;
out_unlock:
- unlock_extent(extent_tree, buf->start, end, GFP_NOFS);
+ unlock_extent(io_tree, buf->start, end, GFP_NOFS);
return buf;
}
@@ -385,7 +391,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct inode *btree_inode = root->fs_info->btree_inode;
if (btrfs_header_generation(buf) ==
root->fs_info->running_transaction->transid)
- clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree,
+ clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
buf);
return 0;
}
@@ -394,7 +400,7 @@ int wait_on_tree_block_writeback(struct btrfs_root *root,
struct extent_buffer *buf)
{
struct inode *btree_inode = root->fs_info->btree_inode;
- wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree,
+ wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree,
buf);
return 0;
}
@@ -659,20 +665,23 @@ struct btrfs_root *open_ctree(struct super_block *sb)
fs_info->btree_inode->i_nlink = 1;
fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
- extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
+ extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
fs_info->btree_inode->i_mapping,
GFP_NOFS);
- BTRFS_I(fs_info->btree_inode)->extent_tree.ops = &btree_extent_map_ops;
+ extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree,
+ GFP_NOFS);
+
+ BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
- extent_map_tree_init(&fs_info->free_space_cache,
+ extent_io_tree_init(&fs_info->free_space_cache,
fs_info->btree_inode->i_mapping, GFP_NOFS);
- extent_map_tree_init(&fs_info->block_group_cache,
+ extent_io_tree_init(&fs_info->block_group_cache,
fs_info->btree_inode->i_mapping, GFP_NOFS);
- extent_map_tree_init(&fs_info->pinned_extents,
+ extent_io_tree_init(&fs_info->pinned_extents,
fs_info->btree_inode->i_mapping, GFP_NOFS);
- extent_map_tree_init(&fs_info->pending_del,
+ extent_io_tree_init(&fs_info->pending_del,
fs_info->btree_inode->i_mapping, GFP_NOFS);
- extent_map_tree_init(&fs_info->extent_ins,
+ extent_io_tree_init(&fs_info->extent_ins,
fs_info->btree_inode->i_mapping, GFP_NOFS);
fs_info->do_barriers = 1;
fs_info->closing = 0;
@@ -787,7 +796,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
if (!btrfs_test_opt(root, NOBARRIER))
blkdev_issue_flush(sb->s_bdev, NULL);
- set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super);
+ set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super);
ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping,
super->start, super->len);
if (!btrfs_test_opt(root, NOBARRIER))
@@ -864,12 +873,12 @@ int close_ctree(struct btrfs_root *root)
filemap_write_and_wait(fs_info->btree_inode->i_mapping);
- extent_map_tree_empty_lru(&fs_info->free_space_cache);
- extent_map_tree_empty_lru(&fs_info->block_group_cache);
- extent_map_tree_empty_lru(&fs_info->pinned_extents);
- extent_map_tree_empty_lru(&fs_info->pending_del);
- extent_map_tree_empty_lru(&fs_info->extent_ins);
- extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree);
+ extent_io_tree_empty_lru(&fs_info->free_space_cache);
+ extent_io_tree_empty_lru(&fs_info->block_group_cache);
+ extent_io_tree_empty_lru(&fs_info->pinned_extents);
+ extent_io_tree_empty_lru(&fs_info->pending_del);
+ extent_io_tree_empty_lru(&fs_info->extent_ins);
+ extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
@@ -892,13 +901,13 @@ int close_ctree(struct btrfs_root *root)
int btrfs_buffer_uptodate(struct extent_buffer *buf)
{
struct inode *btree_inode = buf->first_page->mapping->host;
- return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf);
+ return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf);
}
int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
{
struct inode *btree_inode = buf->first_page->mapping->host;
- return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree,
+ return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree,
buf);
}
@@ -914,7 +923,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
transid, root->fs_info->generation);
WARN_ON(1);
}
- set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf);
+ set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
}
void btrfs_throttle(struct btrfs_root *root)
@@ -941,7 +950,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
- set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
+ set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start,
buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS);
}
@@ -949,7 +958,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
- set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
+ set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start,
buf->start + buf->len - 1, EXTENT_DEFRAG_DONE,
GFP_NOFS);
}
@@ -958,7 +967,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
- return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
+ return test_range_bit(&BTRFS_I(btree_inode)->io_tree,
buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0);
}
@@ -966,7 +975,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
- return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
+ return test_range_bit(&BTRFS_I(btree_inode)->io_tree,
buf->start, buf->start + buf->len - 1,
EXTENT_DEFRAG_DONE, 0);
}
@@ -975,7 +984,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
- return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
+ return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree,
buf->start, buf->start + buf->len - 1,
EXTENT_DEFRAG_DONE, GFP_NOFS);
}
@@ -984,7 +993,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
- return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
+ return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree,
buf->start, buf->start + buf->len - 1,
EXTENT_DEFRAG, GFP_NOFS);
}
@@ -993,10 +1002,10 @@ int btrfs_read_buffer(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
- return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
+ return read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
buf, 0, 1);
}
-static struct extent_map_ops btree_extent_map_ops = {
+static struct extent_io_ops btree_extent_io_ops = {
.writepage_io_hook = btree_writepage_io_hook,
};
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b69a46691a96..1cf125ab7822 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -63,7 +63,7 @@ static int cache_block_group(struct btrfs_root *root,
int ret;
struct btrfs_key key;
struct extent_buffer *leaf;
- struct extent_map_tree *free_space_cache;
+ struct extent_io_tree *free_space_cache;
int slot;
u64 last = 0;
u64 hole_size;
@@ -158,7 +158,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
btrfs_fs_info *info,
u64 bytenr)
{
- struct extent_map_tree *block_group_cache;
+ struct extent_io_tree *block_group_cache;
struct btrfs_block_group_cache *block_group = NULL;
u64 ptr;
u64 start;
@@ -281,7 +281,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
int data, int owner)
{
struct btrfs_block_group_cache *cache;
- struct extent_map_tree *block_group_cache;
+ struct extent_io_tree *block_group_cache;
struct btrfs_block_group_cache *found_group = NULL;
struct btrfs_fs_info *info = root->fs_info;
u64 used;
@@ -951,7 +951,7 @@ fail:
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- struct extent_map_tree *block_group_cache;
+ struct extent_io_tree *block_group_cache;
struct btrfs_block_group_cache *cache;
int ret;
int err = 0;
@@ -1107,12 +1107,12 @@ static int update_pinned_extents(struct btrfs_root *root,
return 0;
}
-int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
+int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
{
u64 last = 0;
u64 start;
u64 end;
- struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
+ struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
int ret;
while(1) {
@@ -1128,12 +1128,12 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct extent_map_tree *unpin)
+ struct extent_io_tree *unpin)
{
u64 start;
u64 end;
int ret;
- struct extent_map_tree *free_space_cache;
+ struct extent_io_tree *free_space_cache;
free_space_cache = &root->fs_info->free_space_cache;
while(1) {
@@ -1329,8 +1329,8 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct
int err = 0;
u64 start;
u64 end;
- struct extent_map_tree *pending_del;
- struct extent_map_tree *pinned_extents;
+ struct extent_io_tree *pending_del;
+ struct extent_io_tree *pinned_extents;
pending_del = &extent_root->fs_info->pending_del;
pinned_extents = &extent_root->fs_info->pinned_extents;
@@ -1802,7 +1802,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
buf->start + buf->len - 1, GFP_NOFS);
- set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
+ set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree,
buf->start, buf->start + buf->len - 1,
EXTENT_CSUM, GFP_NOFS);
buf->flags |= EXTENT_CSUM;
@@ -2166,7 +2166,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
unsigned long i;
struct page *page;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct file_ra_state *ra;
ra = kzalloc(sizeof(*ra), GFP_NOFS);
@@ -2195,15 +2195,14 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
page_start = (u64)page->index << PAGE_CACHE_SHIFT;
page_end = page_start + PAGE_CACHE_SIZE - 1;
- lock_extent(em_tree, page_start, page_end, GFP_NOFS);
+ lock_extent(io_tree, page_start, page_end, GFP_NOFS);
delalloc_start = page_start;
- existing_delalloc =
- count_range_bits(&BTRFS_I(inode)->extent_tree,
- &delalloc_start, page_end,
- PAGE_CACHE_SIZE, EXTENT_DELALLOC);
+ existing_delalloc = count_range_bits(io_tree,
+ &delalloc_start, page_end,
+ PAGE_CACHE_SIZE, EXTENT_DELALLOC);
- set_extent_delalloc(em_tree, page_start,
+ set_extent_delalloc(io_tree, page_start,
page_end, GFP_NOFS);
spin_lock(&root->fs_info->delalloc_lock);
@@ -2211,7 +2210,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
existing_delalloc;
spin_unlock(&root->fs_info->delalloc_lock);
- unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
+ unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
set_page_dirty(page);
unlock_page(page);
page_cache_release(page);
@@ -2379,7 +2378,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
u64 cur_byte;
u64 total_found;
struct btrfs_fs_info *info = root->fs_info;
- struct extent_map_tree *block_group_cache;
+ struct extent_io_tree *block_group_cache;
struct btrfs_key key;
struct btrfs_key found_key;
struct extent_buffer *leaf;
@@ -2561,7 +2560,7 @@ int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *cache;
struct btrfs_block_group_item *item;
struct btrfs_fs_info *info = root->fs_info;
- struct extent_map_tree *block_group_cache;
+ struct extent_io_tree *block_group_cache;
struct btrfs_key key;
struct extent_buffer *leaf;
int ret;
@@ -2645,7 +2644,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
int bit;
struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *info = root->fs_info;
- struct extent_map_tree *block_group_cache;
+ struct extent_io_tree *block_group_cache;
struct btrfs_key key;
struct btrfs_key found_key;
struct extent_buffer *leaf;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
new file mode 100644
index 000000000000..15cc158a0498
--- /dev/null
+++ b/fs/btrfs/extent_io.c
@@ -0,0 +1,3089 @@
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/bio.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/pagemap.h>
+#include <linux/page-flags.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/blkdev.h>
+#include <linux/swap.h>
+#include <linux/version.h>
+#include <linux/writeback.h>
+#include <linux/pagevec.h>
+#include "extent_io.h"
+#include "extent_map.h"
+
+/* temporary define until extent_map moves out of btrfs */
+struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
+ unsigned long extra_flags,
+ void (*ctor)(void *, struct kmem_cache *,
+ unsigned long));
+
+static struct kmem_cache *extent_state_cache;
+static struct kmem_cache *extent_buffer_cache;
+
+static LIST_HEAD(buffers);
+static LIST_HEAD(states);
+
+static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
+#define BUFFER_LRU_MAX 64
+
+struct tree_entry {
+ u64 start;
+ u64 end;
+ int in_tree;
+ struct rb_node rb_node;
+};
+
+struct extent_page_data {
+ struct bio *bio;
+ struct extent_io_tree *tree;
+ get_extent_t *get_extent;
+};
+
+int __init extent_io_init(void)
+{
+ extent_state_cache = btrfs_cache_create("extent_state",
+ sizeof(struct extent_state), 0,
+ NULL);
+ if (!extent_state_cache)
+ return -ENOMEM;
+
+ extent_buffer_cache = btrfs_cache_create("extent_buffers",
+ sizeof(struct extent_buffer), 0,
+ NULL);
+ if (!extent_buffer_cache)
+ goto free_state_cache;
+ return 0;
+
+free_state_cache:
+ kmem_cache_destroy(extent_state_cache);
+ return -ENOMEM;
+}
+
+void extent_io_exit(void)
+{
+ struct extent_state *state;
+
+ while (!list_empty(&states)) {
+ state = list_entry(states.next, struct extent_state, list);
+ printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs));
+ list_del(&state->list);
+ kmem_cache_free(extent_state_cache, state);
+
+ }
+
+ if (extent_state_cache)
+ kmem_cache_destroy(extent_state_cache);
+ if (extent_buffer_cache)
+ kmem_cache_destroy(extent_buffer_cache);
+}
+
+void extent_io_tree_init(struct extent_io_tree *tree,
+ struct address_space *mapping, gfp_t mask)
+{
+ tree->state.rb_node = NULL;
+ tree->ops = NULL;
+ tree->dirty_bytes = 0;
+ rwlock_init(&tree->lock);
+ spin_lock_init(&tree->lru_lock);
+ tree->mapping = mapping;
+ INIT_LIST_HEAD(&tree->buffer_lru);
+ tree->lru_size = 0;
+}
+EXPORT_SYMBOL(extent_io_tree_init);
+
+void extent_io_tree_empty_lru(struct extent_io_tree *tree)
+{
+ struct extent_buffer *eb;
+ while(!list_empty(&tree->buffer_lru)) {
+ eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
+ lru);
+ list_del_init(&eb->lru);
+ free_extent_buffer(eb);
+ }
+}
+EXPORT_SYMBOL(extent_io_tree_empty_lru);
+
+struct extent_state *alloc_extent_state(gfp_t mask)
+{
+ struct extent_state *state;
+ unsigned long flags;
+
+ state = kmem_cache_alloc(extent_state_cache, mask);
+ if (!state || IS_ERR(state))
+ return state;
+ state->state = 0;
+ state->in_tree = 0;
+ state->private = 0;
+
+ spin_lock_irqsave(&state_lock, flags);
+ list_add(&state->list, &states);
+ spin_unlock_irqrestore(&state_lock, flags);
+
+ atomic_set(&state->refs, 1);
+ init_waitqueue_head(&state->wq);
+ return state;
+}
+EXPORT_SYMBOL(alloc_extent_state);
+
+void free_extent_state(struct extent_state *state)
+{
+ unsigned long flags;
+ if (!state)
+ return;
+ if (atomic_dec_and_test(&state->refs)) {
+ WARN_ON(state->in_tree);
+ spin_lock_irqsave(&state_lock, flags);
+ list_del(&state->list);
+ spin_unlock_irqrestore(&state_lock, flags);
+ kmem_cache_free(extent_state_cache, state);
+ }
+}
+EXPORT_SYMBOL(free_extent_state);
+
+static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
+ struct rb_node *node)
+{
+ struct rb_node ** p = &root->rb_node;
+ struct rb_node * parent = NULL;
+ struct tree_entry *entry;
+
+ while(*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct tree_entry, rb_node);
+
+ if (offset < entry->start)
+ p = &(*p)->rb_left;
+ else if (offset > entry->end)
+ p = &(*p)->rb_right;
+ else
+ return parent;
+ }
+
+ entry = rb_entry(node, struct tree_entry, rb_node);
+ entry->in_tree = 1;
+ rb_link_node(node, parent, p);
+ rb_insert_color(node, root);
+ return NULL;
+}
+
+static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
+ struct rb_node **prev_ret,
+ struct rb_node **next_ret)
+{
+ struct rb_node * n = root->rb_node;
+ struct rb_node *prev = NULL;
+ struct rb_node *orig_prev = NULL;
+ struct tree_entry *entry;
+ struct tree_entry *prev_entry = NULL;
+
+ while(n) {
+ entry = rb_entry(n, struct tree_entry, rb_node);
+ prev = n;
+ prev_entry = entry;
+
+ if (offset < entry->start)
+ n = n->rb_left;
+ else if (offset > entry->end)
+ n = n->rb_right;
+ else
+ return n;
+ }
+
+ if (prev_ret) {
+ orig_prev = prev;
+ while(prev && offset > prev_entry->end) {
+ prev = rb_next(prev);
+ prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+ }
+ *prev_ret = prev;
+ prev = orig_prev;
+ }
+
+ if (next_ret) {
+ prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+ while(prev && offset < prev_entry->start) {
+ prev = rb_prev(prev);
+ prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+ }
+ *next_ret = prev;
+ }
+ return NULL;
+}
+
+static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
+{
+ struct rb_node *prev;
+ struct rb_node *ret;
+ ret = __tree_search(root, offset, &prev, NULL);
+ if (!ret)
+ return prev;
+ return ret;
+}
+
+/*
+ * utility function to look for merge candidates inside a given range.
+ * Any extents with matching state are merged together into a single
+ * extent in the tree. Extents with EXTENT_IO in their state field
+ * are not merged because the end_io handlers need to be able to do
+ * operations on them without sleeping (or doing allocations/splits).
+ *
+ * This should be called with the tree lock held.
+ */
+static int merge_state(struct extent_io_tree *tree,
+ struct extent_state *state)
+{
+ struct extent_state *other;
+ struct rb_node *other_node;
+
+ if (state->state & EXTENT_IOBITS)
+ return 0;
+
+ other_node = rb_prev(&state->rb_node);
+ if (other_node) {
+ other = rb_entry(other_node, struct extent_state, rb_node);
+ if (other->end == state->start - 1 &&
+ other->state == state->state) {
+ state->start = other->start;
+ other->in_tree = 0;
+ rb_erase(&other->rb_node, &tree->state);
+ free_extent_state(other);
+ }
+ }
+ other_node = rb_next(&state->rb_node);
+ if (other_node) {
+ other = rb_entry(other_node, struct extent_state, rb_node);
+ if (other->start == state->end + 1 &&
+ other->state == state->state) {
+ other->start = state->start;
+ state->in_tree = 0;
+ rb_erase(&state->rb_node, &tree->state);
+ free_extent_state(state);
+ }
+ }
+ return 0;
+}
+
+/*
+ * insert an extent_state struct into the tree. 'bits' are set on the
+ * struct before it is inserted.
+ *
+ * This may return -EEXIST if the extent is already there, in which case the
+ * state struct is freed.
+ *
+ * The tree lock is not taken internally. This is a utility function and
+ * probably isn't what you want to call (see set/clear_extent_bit).
+ */
+static int insert_state(struct extent_io_tree *tree,
+ struct extent_state *state, u64 start, u64 end,
+ int bits)
+{
+ struct rb_node *node;
+
+ if (end < start) {
+ printk("end < start %Lu %Lu\n", end, start);
+ WARN_ON(1);
+ }
+ if (bits & EXTENT_DIRTY)
+ tree->dirty_bytes += end - start + 1;
+ state->state |= bits;
+ state->start = start;
+ state->end = end;
+ node = tree_insert(&tree->state, end, &state->rb_node);
+ if (node) {
+ struct extent_state *found;
+ found = rb_entry(node, struct extent_state, rb_node);
+ printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
+ free_extent_state(state);
+ return -EEXIST;
+ }
+ merge_state(tree, state);
+ return 0;
+}
+
+/*
+ * split a given extent state struct in two, inserting the preallocated
+ * struct 'prealloc' as the newly created second half. 'split' indicates an
+ * offset inside 'orig' where it should be split.
+ *
+ * Before calling,
+ * the tree has 'orig' at [orig->start, orig->end]. After calling, there
+ * are two extent state structs in the tree:
+ * prealloc: [orig->start, split - 1]
+ * orig: [ split, orig->end ]
+ *
+ * The tree locks are not taken by this function. They need to be held
+ * by the caller.
+ */
+static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
+ struct extent_state *prealloc, u64 split)
+{
+ struct rb_node *node;
+ prealloc->start = orig->start;
+ prealloc->end = split - 1;
+ prealloc->state = orig->state;
+ orig->start = split;
+
+ node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
+ if (node) {
+ struct extent_state *found;
+ found = rb_entry(node, struct extent_state, rb_node);
+ printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
+ free_extent_state(prealloc);
+ return -EEXIST;
+ }
+ return 0;
+}
+
+/*
+ * utility function to clear some bits in an extent state struct.
+ * it will optionally wake up any one waiting on this state (wake == 1), or
+ * forcibly remove the state from the tree (delete == 1).
+ *
+ * If no bits are set on the state struct after clearing things, the
+ * struct is freed and removed from the tree
+ */
+static int clear_state_bit(struct extent_io_tree *tree,
+ struct extent_state *state, int bits, int wake,
+ int delete)
+{
+ int ret = state->state & bits;
+
+ if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
+ u64 range = state->end - state->start + 1;
+ WARN_ON(range > tree->dirty_bytes);
+ tree->dirty_bytes -= range;
+ }
+ state->state &= ~bits;
+ if (wake)
+ wake_up(&state->wq);
+ if (delete || state->state == 0) {
+ if (state->in_tree) {
+ rb_erase(&state->rb_node, &tree->state);
+ state->in_tree = 0;
+ free_extent_state(state);
+ } else {
+ WARN_ON(1);
+ }
+ } else {
+ merge_state(tree, state);
+ }
+ return ret;
+}
+
+/*
+ * clear some bits on a range in the tree. This may require splitting
+ * or inserting elements in the tree, so the gfp mask is used to
+ * indicate which allocations or sleeping are allowed.
+ *
+ * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
+ * the given range from the tree regardless of state (ie for truncate).
+ *
+ * the range [start, end] is inclusive.
+ *
+ * This takes the tree lock, and returns < 0 on error, > 0 if any of the
+ * bits were already set, or zero if none of the bits were already set.
+ */
+int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, int wake, int delete, gfp_t mask)
+{
+ struct extent_state *state;
+ struct extent_state *prealloc = NULL;
+ struct rb_node *node;
+ unsigned long flags;
+ int err;
+ int set = 0;
+
+again:
+ if (!prealloc && (mask & __GFP_WAIT)) {
+ prealloc = alloc_extent_state(mask);
+ if (!prealloc)
+ return -ENOMEM;
+ }
+
+ write_lock_irqsave(&tree->lock, flags);
+ /*
+ * this search will find the extents that end after
+ * our range starts
+ */
+ node = tree_search(&tree->state, start);
+ if (!node)
+ goto out;
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->start > end)
+ goto out;
+ WARN_ON(state->end < start);
+
+ /*
+ * | ---- desired range ---- |
+ * | state | or
+ * | ------------- state -------------- |
+ *
+ * We need to split the extent we found, and may flip
+ * bits on second half.
+ *
+ * If the extent we found extends past our range, we
+ * just split and search again. It'll get split again
+ * the next time though.
+ *
+ * If the extent we found is inside our range, we clear
+ * the desired bit on it.
+ */
+
+ if (state->start < start) {
+ err = split_state(tree, state, prealloc, start);
+ BUG_ON(err == -EEXIST);
+ prealloc = NULL;
+ if (err)
+ goto out;
+ if (state->end <= end) {
+ start = state->end + 1;
+ set |= clear_state_bit(tree, state, bits,
+ wake, delete);
+ } else {
+ start = state->start;
+ }
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ * We need to split the extent, and clear the bit
+ * on the first half
+ */
+ if (state->start <= end && state->end > end) {
+ err = split_state(tree, state, prealloc, end + 1);
+ BUG_ON(err == -EEXIST);
+
+ if (wake)
+ wake_up(&state->wq);
+ set |= clear_state_bit(tree, prealloc, bits,
+ wake, delete);
+ prealloc = NULL;
+ goto out;
+ }
+
+ start = state->end + 1;
+ set |= clear_state_bit(tree, state, bits, wake, delete);
+ goto search_again;
+
+out:
+ write_unlock_irqrestore(&tree->lock, flags);
+ if (prealloc)
+ free_extent_state(prealloc);
+
+ return set;
+
+search_again:
+ if (start > end)
+ goto out;
+ write_unlock_irqrestore(&tree->lock, flags);
+ if (mask & __GFP_WAIT)
+ cond_resched();
+ goto again;
+}
+EXPORT_SYMBOL(clear_extent_bit);
+
+static int wait_on_state(struct extent_io_tree *tree,
+ struct extent_state *state)
+{
+ DEFINE_WAIT(wait);
+ prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
+ read_unlock_irq(&tree->lock);
+ schedule();
+ read_lock_irq(&tree->lock);
+ finish_wait(&state->wq, &wait);
+ return 0;
+}
+
+/*
+ * waits for one or more bits to clear on a range in the state tree.
+ * The range [start, end] is inclusive.
+ * The tree lock is taken by this function
+ */
+int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
+{
+ struct extent_state *state;
+ struct rb_node *node;
+
+ read_lock_irq(&tree->lock);
+again:
+ while (1) {
+ /*
+ * this search will find all the extents that end after
+ * our range starts
+ */
+ node = tree_search(&tree->state, start);
+ if (!node)
+ break;
+
+ state = rb_entry(node, struct extent_state, rb_node);
+
+ if (state->start > end)
+ goto out;
+
+ if (state->state & bits) {
+ start = state->start;
+ atomic_inc(&state->refs);
+ wait_on_state(tree, state);
+ free_extent_state(state);
+ goto again;
+ }
+ start = state->end + 1;
+
+ if (start > end)
+ break;
+
+ if (need_resched()) {
+ read_unlock_irq(&tree->lock);
+ cond_resched();
+ read_lock_irq(&tree->lock);
+ }
+ }
+out:
+ read_unlock_irq(&tree->lock);
+ return 0;
+}
+EXPORT_SYMBOL(wait_extent_bit);
+
+static void set_state_bits(struct extent_io_tree *tree,
+ struct extent_state *state,
+ int bits)
+{
+ if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
+ u64 range = state->end - state->start + 1;
+ tree->dirty_bytes += range;
+ }
+ state->state |= bits;
+}
+
+/*
+ * set some bits on a range in the tree. This may require allocations
+ * or sleeping, so the gfp mask is used to indicate what is allowed.
+ *
+ * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
+ * range already has the desired bits set. The start of the existing
+ * range is returned in failed_start in this case.
+ *
+ * [start, end] is inclusive
+ * This takes the tree lock.
+ */
+int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
+ int exclusive, u64 *failed_start, gfp_t mask)
+{
+ struct extent_state *state;
+ struct extent_state *prealloc = NULL;
+ struct rb_node *node;
+ unsigned long flags;
+ int err = 0;
+ int set;
+ u64 last_start;
+ u64 last_end;
+again:
+ if (!prealloc && (mask & __GFP_WAIT)) {
+ prealloc = alloc_extent_state(mask);
+ if (!prealloc)
+ return -ENOMEM;
+ }
+
+ write_lock_irqsave(&tree->lock, flags);
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = tree_search(&tree->state, start);
+ if (!node) {
+ err = insert_state(tree, prealloc, start, end, bits);
+ prealloc = NULL;
+ BUG_ON(err == -EEXIST);
+ goto out;
+ }
+
+ state = rb_entry(node, struct extent_state, rb_node);
+ last_start = state->start;
+ last_end = state->end;
+
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ *
+ * Just lock what we found and keep going
+ */
+ if (state->start == start && state->end <= end) {
+ set = state->state & bits;
+ if (set && exclusive) {
+ *failed_start = state->start;
+ err = -EEXIST;
+ goto out;
+ }
+ set_state_bits(tree, state, bits);
+ start = state->end + 1;
+ merge_state(tree, state);
+ goto search_again;
+ }
+
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ * or
+ * | ------------- state -------------- |
+ *
+ * We need to split the extent we found, and may flip bits on
+ * second half.
+ *
+ * If the extent we found extends past our
+ * range, we just split and search again. It'll get split
+ * again the next time though.
+ *
+ * If the extent we found is inside our range, we set the
+ * desired bit on it.
+ */
+ if (state->start < start) {
+ set = state->state & bits;
+ if (exclusive && set) {
+ *failed_start = start;
+ err = -EEXIST;
+ goto out;
+ }
+ err = split_state(tree, state, prealloc, start);
+ BUG_ON(err == -EEXIST);
+ prealloc = NULL;
+ if (err)
+ goto out;
+ if (state->end <= end) {
+ set_state_bits(tree, state, bits);
+ start = state->end + 1;
+ merge_state(tree, state);
+ } else {
+ start = state->start;
+ }
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | state | or | state |
+ *
+ * There's a hole, we need to insert something in it and
+ * ignore the extent we found.
+ */
+ if (state->start > start) {
+ u64 this_end;
+ if (end < last_start)
+ this_end = end;
+ else
+ this_end = last_start -1;
+ err = insert_state(tree, prealloc, start, this_end,
+ bits);
+ prealloc = NULL;
+ BUG_ON(err == -EEXIST);
+ if (err)
+ goto out;
+ start = this_end + 1;
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ * We need to split the extent, and set the bit
+ * on the first half
+ */
+ if (state->start <= end && state->end > end) {
+ set = state->state & bits;
+ if (exclusive && set) {
+ *failed_start = start;
+ err = -EEXIST;
+ goto out;
+ }
+ err = split_state(tree, state, prealloc, end + 1);
+ BUG_ON(err == -EEXIST);
+
+ set_state_bits(tree, prealloc, bits);
+ merge_state(tree, prealloc);
+ prealloc = NULL;
+ goto out;
+ }
+
+ goto search_again;
+
+out:
+ write_unlock_irqrestore(&tree->lock, flags);
+ if (prealloc)
+ free_extent_state(prealloc);
+
+ return err;
+
+search_again:
+ if (start > end)
+ goto out;
+ write_unlock_irqrestore(&tree->lock, flags);
+ if (mask & __GFP_WAIT)
+ cond_resched();
+ goto again;
+}
+EXPORT_SYMBOL(set_extent_bit);
+
+/* wrappers around set/clear extent bit */
+int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
+ mask);
+}
+EXPORT_SYMBOL(set_extent_dirty);
+
+int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, bits, 0, NULL,
+ mask);
+}
+EXPORT_SYMBOL(set_extent_bits);
+
+int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_bits);
+
+int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end,
+ EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL,
+ mask);
+}
+EXPORT_SYMBOL(set_extent_delalloc);
+
+int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end,
+ EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_dirty);
+
+int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
+ mask);
+}
+EXPORT_SYMBOL(set_extent_new);
+
+int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_new);
+
+int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
+ mask);
+}
+EXPORT_SYMBOL(set_extent_uptodate);
+
+int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_uptodate);
+
+int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
+ 0, NULL, mask);
+}
+EXPORT_SYMBOL(set_extent_writeback);
+
+int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_writeback);
+
+int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
+}
+EXPORT_SYMBOL(wait_on_extent_writeback);
+
+/*
+ * locks a range in ascending order, waiting for any locked regions
+ * it hits on the way. [start,end] are inclusive, and this will sleep.
+ */
+int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
+{
+ int err;
+ u64 failed_start;
+ while (1) {
+ err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
+ &failed_start, mask);
+ if (err == -EEXIST && (mask & __GFP_WAIT)) {
+ wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
+ start = failed_start;
+ } else {
+ break;
+ }
+ WARN_ON(start > end);
+ }
+ return err;
+}
+EXPORT_SYMBOL(lock_extent);
+
+int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
+}
+EXPORT_SYMBOL(unlock_extent);
+
+/*
+ * helper function to set pages and extents in the tree dirty
+ */
+int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ struct page *page;
+
+ while (index <= end_index) {
+ page = find_get_page(tree->mapping, index);
+ BUG_ON(!page);
+ __set_page_dirty_nobuffers(page);
+ page_cache_release(page);
+ index++;
+ }
+ set_extent_dirty(tree, start, end, GFP_NOFS);
+ return 0;
+}
+EXPORT_SYMBOL(set_range_dirty);
+
+/*
+ * helper function to set both pages and extents in the tree writeback
+ */
+int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ struct page *page;
+
+ while (index <= end_index) {
+ page = find_get_page(tree->mapping, index);
+ BUG_ON(!page);
+ set_page_writeback(page);
+ page_cache_release(page);
+ index++;
+ }
+ set_extent_writeback(tree, start, end, GFP_NOFS);
+ return 0;
+}
+EXPORT_SYMBOL(set_range_writeback);
+
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, int bits)
+{
+ struct rb_node *node;
+ struct extent_state *state;
+ int ret = 1;
+
+ read_lock_irq(&tree->lock);
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = tree_search(&tree->state, start);
+ if (!node || IS_ERR(node)) {
+ goto out;
+ }
+
+ while(1) {
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->end >= start && (state->state & bits)) {
+ *start_ret = state->start;
+ *end_ret = state->end;
+ ret = 0;
+ break;
+ }
+ node = rb_next(node);
+ if (!node)
+ break;
+ }
+out:
+ read_unlock_irq(&tree->lock);
+ return ret;
+}
+EXPORT_SYMBOL(find_first_extent_bit);
+
+u64 find_lock_delalloc_range(struct extent_io_tree *tree,
+ u64 *start, u64 *end, u64 max_bytes)
+{
+ struct rb_node *node;
+ struct extent_state *state;
+ u64 cur_start = *start;
+ u64 found = 0;
+ u64 total_bytes = 0;
+
+ write_lock_irq(&tree->lock);
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+search_again:
+ node = tree_search(&tree->state, cur_start);
+ if (!node || IS_ERR(node)) {
+ *end = (u64)-1;
+ goto out;
+ }
+
+ while(1) {
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (found && state->start != cur_start) {
+ goto out;
+ }
+ if (!(state->state & EXTENT_DELALLOC)) {
+ if (!found)
+ *end = state->end;
+ goto out;
+ }
+ if (!found) {
+ struct extent_state *prev_state;
+ struct rb_node *prev_node = node;
+ while(1) {
+ prev_node = rb_prev(prev_node);
+ if (!prev_node)
+ break;
+ prev_state = rb_entry(prev_node,
+ struct extent_state,
+ rb_node);
+ if (!(prev_state->state & EXTENT_DELALLOC))
+ break;
+ state = prev_state;
+ node = prev_node;
+ }
+ }
+ if (state->state & EXTENT_LOCKED) {
+ DEFINE_WAIT(wait);
+ atomic_inc(&state->refs);
+ prepare_to_wait(&state->wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ write_unlock_irq(&tree->lock);
+ schedule();
+ write_lock_irq(&tree->lock);
+ finish_wait(&state->wq, &wait);
+ free_extent_state(state);
+ goto search_again;
+ }
+ state->state |= EXTENT_LOCKED;
+ if (!found)
+ *start = state->start;
+ found++;
+ *end = state->end;
+ cur_start = state->end + 1;
+ node = rb_next(node);
+ if (!node)
+ break;
+ total_bytes += state->end - state->start + 1;
+ if (total_bytes >= max_bytes)
+ break;
+ }
+out:
+ write_unlock_irq(&tree->lock);
+ return found;
+}
+
+u64 count_range_bits(struct extent_io_tree *tree,
+ u64 *start, u64 search_end, u64 max_bytes,
+ unsigned long bits)
+{
+ struct rb_node *node;
+ struct extent_state *state;
+ u64 cur_start = *start;
+ u64 total_bytes = 0;
+ int found = 0;
+
+ if (search_end <= cur_start) {
+ printk("search_end %Lu start %Lu\n", search_end, cur_start);
+ WARN_ON(1);
+ return 0;
+ }
+
+ write_lock_irq(&tree->lock);
+ if (cur_start == 0 && bits == EXTENT_DIRTY) {
+ total_bytes = tree->dirty_bytes;
+ goto out;
+ }
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = tree_search(&tree->state, cur_start);
+ if (!node || IS_ERR(node)) {
+ goto out;
+ }
+
+ while(1) {
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->start > search_end)
+ break;
+ if (state->end >= cur_start && (state->state & bits)) {
+ total_bytes += min(search_end, state->end) + 1 -
+ max(cur_start, state->start);
+ if (total_bytes >= max_bytes)
+ break;
+ if (!found) {
+ *start = state->start;
+ found = 1;
+ }
+ }
+ node = rb_next(node);
+ if (!node)
+ break;
+ }
+out:
+ write_unlock_irq(&tree->lock);
+ return total_bytes;
+}
+/*
+ * helper function to lock both pages and extents in the tree.
+ * pages must be locked first.
+ */
+int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ struct page *page;
+ int err;
+
+ while (index <= end_index) {
+ page = grab_cache_page(tree->mapping, index);
+ if (!page) {
+ err = -ENOMEM;
+ goto failed;
+ }
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto failed;
+ }
+ index++;
+ }
+ lock_extent(tree, start, end, GFP_NOFS);
+ return 0;
+
+failed:
+ /*
+ * we failed above in getting the page at 'index', so we undo here
+ * up to but not including the page at 'index'
+ */
+ end_index = index;
+ index = start >> PAGE_CACHE_SHIFT;
+ while (index < end_index) {
+ page = find_get_page(tree->mapping, index);
+ unlock_page(page);
+ page_cache_release(page);
+ index++;
+ }
+ return err;
+}
+EXPORT_SYMBOL(lock_range);
+
+/*
+ * helper function to unlock both pages and extents in the tree.
+ */
+int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ struct page *page;
+
+ while (index <= end_index) {
+ page = find_get_page(tree->mapping, index);
+ unlock_page(page);
+ page_cache_release(page);
+ index++;
+ }
+ unlock_extent(tree, start, end, GFP_NOFS);
+ return 0;
+}
+EXPORT_SYMBOL(unlock_range);
+
+int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
+{
+ struct rb_node *node;
+ struct extent_state *state;
+ int ret = 0;
+
+ write_lock_irq(&tree->lock);
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = tree_search(&tree->state, start);
+ if (!node || IS_ERR(node)) {
+ ret = -ENOENT;
+ goto out;
+ }
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->start != start) {
+ ret = -ENOENT;
+ goto out;
+ }
+ state->private = private;
+out:
+ write_unlock_irq(&tree->lock);
+ return ret;
+}
+
+int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
+{
+ struct rb_node *node;
+ struct extent_state *state;
+ int ret = 0;
+
+ read_lock_irq(&tree->lock);
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = tree_search(&tree->state, start);
+ if (!node || IS_ERR(node)) {
+ ret = -ENOENT;
+ goto out;
+ }
+ state = rb_entry(node, struct extent_state, rb_node);
+ if (state->start != start) {
+ ret = -ENOENT;
+ goto out;
+ }
+ *private = state->private;
+out:
+ read_unlock_irq(&tree->lock);
+ return ret;
+}
+
+/*
+ * searches a range in the state tree for a given mask.
+ * If 'filled' == 1, this returns 1 only if ever extent in the tree
+ * has the bits set. Otherwise, 1 is returned if any bit in the
+ * range is found set.
+ */
+int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, int filled)
+{
+ struct extent_state *state = NULL;
+ struct rb_node *node;
+ int bitset = 0;
+ unsigned long flags;
+
+ read_lock_irqsave(&tree->lock, flags);
+ node = tree_search(&tree->state, start);
+ while (node && start <= end) {
+ state = rb_entry(node, struct extent_state, rb_node);
+
+ if (filled && state->start > start) {
+ bitset = 0;
+ break;
+ }
+
+ if (state->start > end)
+ break;
+
+ if (state->state & bits) {
+ bitset = 1;
+ if (!filled)
+ break;
+ } else if (filled) {
+ bitset = 0;
+ break;
+ }
+ start = state->end + 1;
+ if (start > end)
+ break;
+ node = rb_next(node);
+ if (!node) {
+ if (filled)
+ bitset = 0;
+ break;
+ }
+ }
+ read_unlock_irqrestore(&tree->lock, flags);
+ return bitset;
+}
+EXPORT_SYMBOL(test_range_bit);
+
+/*
+ * helper function to set a given page up to date if all the
+ * extents in the tree for that page are up to date
+ */
+static int check_page_uptodate(struct extent_io_tree *tree,
+ struct page *page)
+{
+ u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 end = start + PAGE_CACHE_SIZE - 1;
+ if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
+ SetPageUptodate(page);
+ return 0;
+}
+
+/*
+ * helper function to unlock a page if all the extents in the tree
+ * for that page are unlocked
+ */
+static int check_page_locked(struct extent_io_tree *tree,
+ struct page *page)
+{
+ u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 end = start + PAGE_CACHE_SIZE - 1;
+ if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
+ unlock_page(page);
+ return 0;
+}
+
+/*
+ * helper function to end page writeback if all the extents
+ * in the tree for that page are done with writeback
+ */
+static int check_page_writeback(struct extent_io_tree *tree,
+ struct page *page)
+{
+ u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 end = start + PAGE_CACHE_SIZE - 1;
+ if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
+ end_page_writeback(page);
+ return 0;
+}
+
+/* lots and lots of room for performance fixes in the end_bio funcs */
+
+/*
+ * after a writepage IO is done, we need to:
+ * clear the uptodate bits on error
+ * clear the writeback bits in the extent tree for this IO
+ * end_page_writeback if the page has no more pending IO
+ *
+ * Scheduling is not allowed, so the extent state tree is expected
+ * to have one and only one object corresponding to this IO.
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_bio_extent_writepage(struct bio *bio, int err)
+#else
+static int end_bio_extent_writepage(struct bio *bio,
+ unsigned int bytes_done, int err)
+#endif
+{
+ const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct extent_io_tree *tree = bio->bi_private;
+ u64 start;
+ u64 end;
+ int whole_page;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+ if (bio->bi_size)
+ return 1;
+#endif
+
+ do {
+ struct page *page = bvec->bv_page;
+ start = ((u64)page->index << PAGE_CACHE_SHIFT) +
+ bvec->bv_offset;
+ end = start + bvec->bv_len - 1;
+
+ if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
+ whole_page = 1;
+ else
+ whole_page = 0;
+
+ if (--bvec >= bio->bi_io_vec)
+ prefetchw(&bvec->bv_page->flags);
+
+ if (!uptodate) {
+ clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
+ ClearPageUptodate(page);
+ SetPageError(page);
+ }
+ clear_extent_writeback(tree, start, end, GFP_ATOMIC);
+
+ if (whole_page)
+ end_page_writeback(page);
+ else
+ check_page_writeback(tree, page);
+ if (tree->ops && tree->ops->writepage_end_io_hook)
+ tree->ops->writepage_end_io_hook(page, start, end);
+ } while (bvec >= bio->bi_io_vec);
+
+ bio_put(bio);
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+ return 0;
+#endif
+}
+
+/*
+ * after a readpage IO is done, we need to:
+ * clear the uptodate bits on error
+ * set the uptodate bits if things worked
+ * set the page up to date if all extents in the tree are uptodate
+ * clear the lock bit in the extent tree
+ * unlock the page if there are no other extents locked for it
+ *
+ * Scheduling is not allowed, so the extent state tree is expected
+ * to have one and only one object corresponding to this IO.
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_bio_extent_readpage(struct bio *bio, int err)
+#else
+static int end_bio_extent_readpage(struct bio *bio,
+ unsigned int bytes_done, int err)
+#endif
+{
+ int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct extent_io_tree *tree = bio->bi_private;
+ u64 start;
+ u64 end;
+ int whole_page;
+ int ret;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+ if (bio->bi_size)
+ return 1;
+#endif
+
+ do {
+ struct page *page = bvec->bv_page;
+ start = ((u64)page->index << PAGE_CACHE_SHIFT) +
+ bvec->bv_offset;
+ end = start + bvec->bv_len - 1;
+
+ if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
+ whole_page = 1;
+ else
+ whole_page = 0;
+
+ if (--bvec >= bio->bi_io_vec)
+ prefetchw(&bvec->bv_page->flags);
+
+ if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
+ ret = tree->ops->readpage_end_io_hook(page, start, end);
+ if (ret)
+ uptodate = 0;
+ }
+ if (uptodate) {
+ set_extent_uptodate(tree, start, end, GFP_ATOMIC);
+ if (whole_page)
+ SetPageUptodate(page);
+ else
+ check_page_uptodate(tree, page);
+ } else {
+ ClearPageUptodate(page);
+ SetPageError(page);
+ }
+
+ unlock_extent(tree, start, end, GFP_ATOMIC);
+
+ if (whole_page)
+ unlock_page(page);
+ else
+ check_page_locked(tree, page);
+ } while (bvec >= bio->bi_io_vec);
+
+ bio_put(bio);
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+ return 0;
+#endif
+}
+
+/*
+ * IO done from prepare_write is pretty simple, we just unlock
+ * the structs in the extent tree when done, and set the uptodate bits
+ * as appropriate.
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
+static void end_bio_extent_preparewrite(struct bio *bio, int err)
+#else
+static int end_bio_extent_preparewrite(struct bio *bio,
+ unsigned int bytes_done, int err)
+#endif
+{
+ const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct extent_io_tree *tree = bio->bi_private;
+ u64 start;
+ u64 end;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+ if (bio->bi_size)
+ return 1;
+#endif
+
+ do {
+ struct page *page = bvec->bv_page;
+ start = ((u64)page->index << PAGE_CACHE_SHIFT) +
+ bvec->bv_offset;
+ end = start + bvec->bv_len - 1;
+
+ if (--bvec >= bio->bi_io_vec)
+ prefetchw(&bvec->bv_page->flags);
+
+ if (uptodate) {
+ set_extent_uptodate(tree, start, end, GFP_ATOMIC);
+ } else {
+ ClearPageUptodate(page);
+ SetPageError(page);
+ }
+
+ unlock_extent(tree, start, end, GFP_ATOMIC);
+
+ } while (bvec >= bio->bi_io_vec);
+
+ bio_put(bio);
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
+ return 0;
+#endif
+}
+
+static struct bio *
+extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
+ gfp_t gfp_flags)
+{
+ struct bio *bio;
+
+ bio = bio_alloc(gfp_flags, nr_vecs);
+
+ if (bio == NULL && (current->flags & PF_MEMALLOC)) {
+ while (!bio && (nr_vecs /= 2))
+ bio = bio_alloc(gfp_flags, nr_vecs);
+ }
+
+ if (bio) {
+ bio->bi_bdev = bdev;
+ bio->bi_sector = first_sector;
+ }
+ return bio;
+}
+
+static int submit_one_bio(int rw, struct bio *bio)
+{
+ u64 maxsector;
+ int ret = 0;
+
+ bio_get(bio);
+
+ maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
+ if (maxsector < bio->bi_sector) {
+ printk("sector too large max %Lu got %llu\n", maxsector,
+ (unsigned long long)bio->bi_sector);
+ WARN_ON(1);
+ }
+
+ submit_bio(rw, bio);
+ if (bio_flagged(bio, BIO_EOPNOTSUPP))
+ ret = -EOPNOTSUPP;
+ bio_put(bio);
+ return ret;
+}
+
+static int submit_extent_page(int rw, struct extent_io_tree *tree,
+ struct page *page, sector_t sector,
+ size_t size, unsigned long offset,
+ struct block_device *bdev,
+ struct bio **bio_ret,
+ unsigned long max_pages,
+ bio_end_io_t end_io_func)
+{
+ int ret = 0;
+ struct bio *bio;
+ int nr;
+
+ if (bio_ret && *bio_ret) {
+ bio = *bio_ret;
+ if (bio->bi_sector + (bio->bi_size >> 9) != sector ||
+ bio_add_page(bio, page, size, offset) < size) {
+ ret = submit_one_bio(rw, bio);
+ bio = NULL;
+ } else {
+ return 0;
+ }
+ }
+ nr = min_t(int, max_pages, bio_get_nr_vecs(bdev));
+ bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
+ if (!bio) {
+ printk("failed to allocate bio nr %d\n", nr);
+ }
+ bio_add_page(bio, page, size, offset);
+ bio->bi_end_io = end_io_func;
+ bio->bi_private = tree;
+ if (bio_ret) {
+ *bio_ret = bio;
+ } else {
+ ret = submit_one_bio(rw, bio);
+ }
+
+ return ret;
+}
+
+void set_page_extent_mapped(struct page *page)
+{
+ if (!PagePrivate(page)) {
+ SetPagePrivate(page);
+ WARN_ON(!page->mapping->a_ops->invalidatepage);
+ set_page_private(page, EXTENT_PAGE_PRIVATE);
+ page_cache_get(page);
+ }
+}
+
+void set_page_extent_head(struct page *page, unsigned long len)
+{
+ set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
+}
+
+/*
+ * basic readpage implementation. Locked extent state structs are inserted
+ * into the tree that are removed when the IO is done (by the end_io
+ * handlers)
+ */
+static int __extent_read_full_page(struct extent_io_tree *tree,
+ struct page *page,
+ get_extent_t *get_extent,
+ struct bio **bio)
+{
+ struct inode *inode = page->mapping->host;
+ u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 page_end = start + PAGE_CACHE_SIZE - 1;
+ u64 end;
+ u64 cur = start;
+ u64 extent_offset;
+ u64 last_byte = i_size_read(inode);
+ u64 block_start;
+ u64 cur_end;
+ sector_t sector;
+ struct extent_map *em;
+ struct block_device *bdev;
+ int ret;
+ int nr = 0;
+ size_t page_offset = 0;
+ size_t iosize;
+ size_t blocksize = inode->i_sb->s_blocksize;
+
+ set_page_extent_mapped(page);
+
+ end = page_end;
+ lock_extent(tree, start, end, GFP_NOFS);
+
+ while (cur <= end) {
+ if (cur >= last_byte) {
+ char *userpage;
+ iosize = PAGE_CACHE_SIZE - page_offset;
+ userpage = kmap_atomic(page, KM_USER0);
+ memset(userpage + page_offset, 0, iosize);
+ flush_dcache_page(page);
+ kunmap_atomic(userpage, KM_USER0);
+ set_extent_uptodate(tree, cur, cur + iosize - 1,
+ GFP_NOFS);
+ unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+ break;
+ }
+ em = get_extent(inode, page, page_offset, cur,
+ end - cur + 1, 0);
+ if (IS_ERR(em) || !em) {
+ SetPageError(page);
+ unlock_extent(tree, cur, end, GFP_NOFS);
+ break;
+ }
+
+ extent_offset = cur - em->start;
+ BUG_ON(extent_map_end(em) <= cur);
+ BUG_ON(end < cur);
+
+ iosize = min(extent_map_end(em) - cur, end - cur + 1);
+ cur_end = min(extent_map_end(em) - 1, end);
+ iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
+ sector = (em->block_start + extent_offset) >> 9;
+ bdev = em->bdev;
+ block_start = em->block_start;
+ free_extent_map(em);
+ em = NULL;
+
+ /* we've found a hole, just zero and go on */
+ if (block_start == EXTENT_MAP_HOLE) {
+ char *userpage;
+ userpage = kmap_atomic(page, KM_USER0);
+ memset(userpage + page_offset, 0, iosize);
+ flush_dcache_page(page);
+ kunmap_atomic(userpage, KM_USER0);
+
+ set_extent_uptodate(tree, cur, cur + iosize - 1,
+ GFP_NOFS);
+ unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+ cur = cur + iosize;
+ page_offset += iosize;
+ continue;
+ }
+ /* the get_extent function already copied into the page */
+ if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
+ unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+ cur = cur + iosize;
+ page_offset += iosize;
+ continue;
+ }
+
+ ret = 0;
+ if (tree->ops && tree->ops->readpage_io_hook) {
+ ret = tree->ops->readpage_io_hook(page, cur,
+ cur + iosize - 1);
+ }
+ if (!ret) {
+ unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+ nr -= page->index;
+ ret = submit_extent_page(READ, tree, page,
+ sector, iosize, page_offset,
+ bdev, bio, nr,
+ end_bio_extent_readpage);
+ }
+ if (ret)
+ SetPageError(page);
+ cur = cur + iosize;
+ page_offset += iosize;
+ nr++;
+ }
+ if (!nr) {
+ if (!PageError(page))
+ SetPageUptodate(page);
+ unlock_page(page);
+ }
+ return 0;
+}
+
+int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
+ get_extent_t *get_extent)
+{
+ struct bio *bio = NULL;
+ int ret;
+
+ ret = __extent_read_full_page(tree, page, get_extent, &bio);
+ if (bio)
+ submit_one_bio(READ, bio);
+ return ret;
+}
+EXPORT_SYMBOL(extent_read_full_page);
+
+/*
+ * the writepage semantics are similar to regular writepage. extent
+ * records are inserted to lock ranges in the tree, and as dirty areas
+ * are found, they are marked writeback. Then the lock bits are removed
+ * and the end_io handler clears the writeback ranges
+ */
+static int __extent_writepage(struct page *page, struct writeback_control *wbc,
+ void *data)
+{
+ struct inode *inode = page->mapping->host;
+ struct extent_page_data *epd = data;
+ struct extent_io_tree *tree = epd->tree;
+ u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 delalloc_start;
+ u64 page_end = start + PAGE_CACHE_SIZE - 1;
+ u64 end;
+ u64 cur = start;
+ u64 extent_offset;
+ u64 last_byte = i_size_read(inode);
+ u64 block_start;
+ u64 iosize;
+ sector_t sector;
+ struct extent_map *em;
+ struct block_device *bdev;
+ int ret;
+ int nr = 0;
+ size_t page_offset = 0;
+ size_t blocksize;
+ loff_t i_size = i_size_read(inode);
+ unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
+ u64 nr_delalloc;
+ u64 delalloc_end;
+
+ WARN_ON(!PageLocked(page));
+ if (page->index > end_index) {
+ clear_extent_dirty(tree, start, page_end, GFP_NOFS);
+ unlock_page(page);
+ return 0;
+ }
+
+ if (page->index == end_index) {
+ char *userpage;
+
+ size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
+
+ userpage = kmap_atomic(page, KM_USER0);
+ memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset);
+ flush_dcache_page(page);
+ kunmap_atomic(userpage, KM_USER0);
+ }
+
+ set_page_extent_mapped(page);
+
+ delalloc_start = start;
+ delalloc_end = 0;
+ while(delalloc_end < page_end) {
+ nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start,
+ &delalloc_end,
+ 128 * 1024 * 1024);
+ if (nr_delalloc == 0) {
+ delalloc_start = delalloc_end + 1;
+ continue;
+ }
+ tree->ops->fill_delalloc(inode, delalloc_start,
+ delalloc_end);
+ clear_extent_bit(tree, delalloc_start,
+ delalloc_end,
+ EXTENT_LOCKED | EXTENT_DELALLOC,
+ 1, 0, GFP_NOFS);
+ delalloc_start = delalloc_end + 1;
+ }
+ lock_extent(tree, start, page_end, GFP_NOFS);
+
+ end = page_end;
+ if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
+ printk("found delalloc bits after lock_extent\n");
+ }
+
+ if (last_byte <= start) {
+ clear_extent_dirty(tree, start, page_end, GFP_NOFS);
+ goto done;
+ }
+
+ set_extent_uptodate(tree, start, page_end, GFP_NOFS);
+ blocksize = inode->i_sb->s_blocksize;
+
+ while (cur <= end) {
+ if (cur >= last_byte) {
+ clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
+ break;
+ }
+ em = epd->get_extent(inode, page, page_offset, cur,
+ end - cur + 1, 1);
+ if (IS_ERR(em) || !em) {
+ SetPageError(page);
+ break;
+ }
+
+ extent_offset = cur - em->start;
+ BUG_ON(extent_map_end(em) <= cur);
+ BUG_ON(end < cur);
+ iosize = min(extent_map_end(em) - cur, end - cur + 1);
+ iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
+ sector = (em->block_start + extent_offset) >> 9;
+ bdev = em->bdev;
+ block_start = em->block_start;
+ free_extent_map(em);
+ em = NULL;
+
+ if (block_start == EXTENT_MAP_HOLE ||
+ block_start == EXTENT_MAP_INLINE) {
+ clear_extent_dirty(tree, cur,
+ cur + iosize - 1, GFP_NOFS);
+ cur = cur + iosize;
+ page_offset += iosize;
+ continue;
+ }
+
+ /* leave this out until we have a page_mkwrite call */
+ if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
+ EXTENT_DIRTY, 0)) {
+ cur = cur + iosize;
+ page_offset += iosize;
+ continue;
+ }
+ clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
+ if (tree->ops && tree->ops->writepage_io_hook) {
+ ret = tree->ops->writepage_io_hook(page, cur,
+ cur + iosize - 1);
+ } else {
+ ret = 0;
+ }
+ if (ret)
+ SetPageError(page);
+ else {
+ unsigned long max_nr = end_index + 1;
+ set_range_writeback(tree, cur, cur + iosize - 1);
+ if (!PageWriteback(page)) {
+ printk("warning page %lu not writeback, "
+ "cur %llu end %llu\n", page->index,
+ (unsigned long long)cur,
+ (unsigned long long)end);
+ }
+
+ ret = submit_extent_page(WRITE, tree, page, sector,
+ iosize, page_offset, bdev,
+ &epd->bio, max_nr,
+ end_bio_extent_writepage);
+ if (ret)
+ SetPageError(page);
+ }
+ cur = cur + iosize;
+ page_offset += iosize;
+ nr++;
+ }
+done:
+ if (nr == 0) {
+ /* make sure the mapping tag for page dirty gets cleared */
+ set_page_writeback(page);
+ end_page_writeback(page);
+ }
+ unlock_extent(tree, start, page_end, GFP_NOFS);
+ unlock_page(page);
+ return 0;
+}
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+
+/* Taken directly from 2.6.23 for 2.6.18 back port */
+typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
+ void *data);
+
+/**
+ * write_cache_pages - walk the list of dirty pages of the given address space
+ * and write all of them.
+ * @mapping: address space structure to write
+ * @wbc: subtract the number of written pages from *@wbc->nr_to_write
+ * @writepage: function called for each page
+ * @data: data passed to writepage function
+ *
+ * If a page is already under I/O, write_cache_pages() skips it, even
+ * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
+ * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
+ * and msync() need to guarantee that all the data which was dirty at the time
+ * the call was made get new I/O started against them. If wbc->sync_mode is
+ * WB_SYNC_ALL then we were called for data integrity and we must wait for
+ * existing IO to complete.
+ */
+static int write_cache_pages(struct address_space *mapping,
+ struct writeback_control *wbc, writepage_t writepage,
+ void *data)
+{
+ struct backing_dev_info *bdi = mapping->backing_dev_info;
+ int ret = 0;
+ int done = 0;
+ struct pagevec pvec;
+ int nr_pages;
+ pgoff_t index;
+ pgoff_t end; /* Inclusive */
+ int scanned = 0;
+ int range_whole = 0;
+
+ if (wbc->nonblocking && bdi_write_congested(bdi)) {
+ wbc->encountered_congestion = 1;
+ return 0;
+ }
+
+ pagevec_init(&pvec, 0);
+ if (wbc->range_cyclic) {
+ index = mapping->writeback_index; /* Start from prev offset */
+ end = -1;
+ } else {
+ index = wbc->range_start >> PAGE_CACHE_SHIFT;
+ end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+ range_whole = 1;
+ scanned = 1;
+ }
+retry:
+ while (!done && (index <= end) &&
+ (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+ PAGECACHE_TAG_DIRTY,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+ unsigned i;
+
+ scanned = 1;
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+
+ /*
+ * At this point we hold neither mapping->tree_lock nor
+ * lock on the page itself: the page may be truncated or
+ * invalidated (changing page->mapping to NULL), or even
+ * swizzled back from swapper_space to tmpfs file
+ * mapping
+ */
+ lock_page(page);
+
+ if (unlikely(page->mapping != mapping)) {
+ unlock_page(page);
+ continue;
+ }
+
+ if (!wbc->range_cyclic && page->index > end) {
+ done = 1;
+ unlock_page(page);
+ continue;
+ }
+
+ if (wbc->sync_mode != WB_SYNC_NONE)
+ wait_on_page_writeback(page);
+
+ if (PageWriteback(page) ||
+ !clear_page_dirty_for_io(page)) {
+ unlock_page(page);
+ continue;
+ }
+
+ ret = (*writepage)(page, wbc, data);
+
+ if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
+ unlock_page(page);
+ ret = 0;
+ }
+ if (ret || (--(wbc->nr_to_write) <= 0))
+ done = 1;
+ if (wbc->nonblocking && bdi_write_congested(bdi)) {
+ wbc->encountered_congestion = 1;
+ done = 1;
+ }
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+ if (!scanned && !done) {
+ /*
+ * We hit the last page and there is more work to be done: wrap
+ * back to the start of the file
+ */
+ scanned = 1;
+ index = 0;
+ goto retry;
+ }
+ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+ mapping->writeback_index = index;
+ return ret;
+}
+#endif
+
+int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
+ get_extent_t *get_extent,
+ struct writeback_control *wbc)
+{
+ int ret;
+ struct address_space *mapping = page->mapping;
+ struct extent_page_data epd = {
+ .bio = NULL,
+ .tree = tree,
+ .get_extent = get_extent,
+ };
+ struct writeback_control wbc_writepages = {
+ .bdi = wbc->bdi,
+ .sync_mode = WB_SYNC_NONE,
+ .older_than_this = NULL,
+ .nr_to_write = 64,
+ .range_start = page_offset(page) + PAGE_CACHE_SIZE,
+ .range_end = (loff_t)-1,
+ };
+
+
+ ret = __extent_writepage(page, wbc, &epd);
+
+ write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
+ if (epd.bio) {
+ submit_one_bio(WRITE, epd.bio);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(extent_write_full_page);
+
+
+int extent_writepages(struct extent_io_tree *tree,
+ struct address_space *mapping,
+ get_extent_t *get_extent,
+ struct writeback_control *wbc)
+{
+ int ret = 0;
+ struct extent_page_data epd = {
+ .bio = NULL,
+ .tree = tree,
+ .get_extent = get_extent,
+ };
+
+ ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
+ if (epd.bio) {
+ submit_one_bio(WRITE, epd.bio);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(extent_writepages);
+
+int extent_readpages(struct extent_io_tree *tree,
+ struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages,
+ get_extent_t get_extent)
+{
+ struct bio *bio = NULL;
+ unsigned page_idx;
+ struct pagevec pvec;
+
+ pagevec_init(&pvec, 0);
+ for (page_idx = 0; page_idx < nr_pages; page_idx++) {
+ struct page *page = list_entry(pages->prev, struct page, lru);
+
+ prefetchw(&page->flags);
+ list_del(&page->lru);
+ /*
+ * what we want to do here is call add_to_page_cache_lru,
+ * but that isn't exported, so we reproduce it here
+ */
+ if (!add_to_page_cache(page, mapping,
+ page->index, GFP_KERNEL)) {
+
+ /* open coding of lru_cache_add, also not exported */
+ page_cache_get(page);
+ if (!pagevec_add(&pvec, page))
+ __pagevec_lru_add(&pvec);
+ __extent_read_full_page(tree, page, get_extent, &bio);
+ }
+ page_cache_release(page);
+ }
+ if (pagevec_count(&pvec))
+ __pagevec_lru_add(&pvec);
+ BUG_ON(!list_empty(pages));
+ if (bio)
+ submit_one_bio(READ, bio);
+ return 0;
+}
+EXPORT_SYMBOL(extent_readpages);
+
+/*
+ * basic invalidatepage code, this waits on any locked or writeback
+ * ranges corresponding to the page, and then deletes any extent state
+ * records from the tree
+ */
+int extent_invalidatepage(struct extent_io_tree *tree,
+ struct page *page, unsigned long offset)
+{
+ u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
+ u64 end = start + PAGE_CACHE_SIZE - 1;
+ size_t blocksize = page->mapping->host->i_sb->s_blocksize;
+
+ start += (offset + blocksize -1) & ~(blocksize - 1);
+ if (start > end)
+ return 0;
+
+ lock_extent(tree, start, end, GFP_NOFS);
+ wait_on_extent_writeback(tree, start, end);
+ clear_extent_bit(tree, start, end,
+ EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
+ 1, 1, GFP_NOFS);
+ return 0;
+}
+EXPORT_SYMBOL(extent_invalidatepage);
+
+/*
+ * simple commit_write call, set_range_dirty is used to mark both
+ * the pages and the extent records as dirty
+ */
+int extent_commit_write(struct extent_io_tree *tree,
+ struct inode *inode, struct page *page,
+ unsigned from, unsigned to)
+{
+ loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+
+ set_page_extent_mapped(page);
+ set_page_dirty(page);
+
+ if (pos > inode->i_size) {
+ i_size_write(inode, pos);
+ mark_inode_dirty(inode);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(extent_commit_write);
+
+int extent_prepare_write(struct extent_io_tree *tree,
+ struct inode *inode, struct page *page,
+ unsigned from, unsigned to, get_extent_t *get_extent)
+{
+ u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+ u64 block_start;
+ u64 orig_block_start;
+ u64 block_end;
+ u64 cur_end;
+ struct extent_map *em;
+ unsigned blocksize = 1 << inode->i_blkbits;
+ size_t page_offset = 0;
+ size_t block_off_start;
+ size_t block_off_end;
+ int err = 0;
+ int iocount = 0;
+ int ret = 0;
+ int isnew;
+
+ set_page_extent_mapped(page);
+
+ block_start = (page_start + from) & ~((u64)blocksize - 1);
+ block_end = (page_start + to - 1) | (blocksize - 1);
+ orig_block_start = block_start;
+
+ lock_extent(tree, page_start, page_end, GFP_NOFS);
+ while(block_start <= block_end) {
+ em = get_extent(inode, page, page_offset, block_start,
+ block_end - block_start + 1, 1);
+ if (IS_ERR(em) || !em) {
+ goto err;
+ }
+ cur_end = min(block_end, extent_map_end(em) - 1);
+ block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
+ block_off_end = block_off_start + blocksize;
+ isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
+
+ if (!PageUptodate(page) && isnew &&
+ (block_off_end > to || block_off_start < from)) {
+ void *kaddr;
+
+ kaddr = kmap_atomic(page, KM_USER0);
+ if (block_off_end > to)
+ memset(kaddr + to, 0, block_off_end - to);
+ if (block_off_start < from)
+ memset(kaddr + block_off_start, 0,
+ from - block_off_start);
+ flush_dcache_page(page);
+ kunmap_atomic(kaddr, KM_USER0);
+ }
+ if ((em->block_start != EXTENT_MAP_HOLE &&
+ em->block_start != EXTENT_MAP_INLINE) &&
+ !isnew && !PageUptodate(page) &&
+ (block_off_end > to || block_off_start < from) &&
+ !test_range_bit(tree, block_start, cur_end,
+ EXTENT_UPTODATE, 1)) {
+ u64 sector;
+ u64 extent_offset = block_start - em->start;
+ size_t iosize;
+ sector = (em->block_start + extent_offset) >> 9;
+ iosize = (cur_end - block_start + blocksize) &
+ ~((u64)blocksize - 1);
+ /*
+ * we've already got the extent locked, but we
+ * need to split the state such that our end_bio
+ * handler can clear the lock.
+ */
+ set_extent_bit(tree, block_start,
+ block_start + iosize - 1,
+ EXTENT_LOCKED, 0, NULL, GFP_NOFS);
+ ret = submit_extent_page(READ, tree, page,
+ sector, iosize, page_offset, em->bdev,
+ NULL, 1,
+ end_bio_extent_preparewrite);
+ iocount++;
+ block_start = block_start + iosize;
+ } else {
+ set_extent_uptodate(tree, block_start, cur_end,
+ GFP_NOFS);
+ unlock_extent(tree, block_start, cur_end, GFP_NOFS);
+ block_start = cur_end + 1;
+ }
+ page_offset = block_start & (PAGE_CACHE_SIZE - 1);
+ free_extent_map(em);
+ }
+ if (iocount) {
+ wait_extent_bit(tree, orig_block_start,
+ block_end, EXTENT_LOCKED);
+ }
+ check_page_uptodate(tree, page);
+err:
+ /* FIXME, zero out newly allocated blocks on error */
+ return err;
+}
+EXPORT_SYMBOL(extent_prepare_write);
+
+/*
+ * a helper for releasepage. As long as there are no locked extents
+ * in the range corresponding to the page, both state records and extent
+ * map records are removed
+ */
+int try_release_extent_mapping(struct extent_map_tree *map,
+ struct extent_io_tree *tree, struct page *page)
+{
+ struct extent_map *em;
+ u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 end = start + PAGE_CACHE_SIZE - 1;
+ u64 orig_start = start;
+ int ret = 1;
+
+ while (start <= end) {
+ spin_lock(&map->lock);
+ em = lookup_extent_mapping(map, start, end);
+ if (!em || IS_ERR(em)) {
+ spin_unlock(&map->lock);
+ break;
+ }
+ if (!test_range_bit(tree, em->start, extent_map_end(em) - 1,
+ EXTENT_LOCKED, 0)) {
+ remove_extent_mapping(map, em);
+ /* once for the rb tree */
+ free_extent_map(em);
+ }
+ start = extent_map_end(em);
+ spin_unlock(&map->lock);
+
+ /* once for us */
+ free_extent_map(em);
+ }
+ if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0))
+ ret = 0;
+ else
+ clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
+ 1, 1, GFP_NOFS);
+ return ret;
+}
+EXPORT_SYMBOL(try_release_extent_mapping);
+
+sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
+ get_extent_t *get_extent)
+{
+ struct inode *inode = mapping->host;
+ u64 start = iblock << inode->i_blkbits;
+ sector_t sector = 0;
+ struct extent_map *em;
+
+ em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0);
+ if (!em || IS_ERR(em))
+ return 0;
+
+ if (em->block_start == EXTENT_MAP_INLINE ||
+ em->block_start == EXTENT_MAP_HOLE)
+ goto out;
+
+ sector = (em->block_start + start - em->start) >> inode->i_blkbits;
+printk("bmap finds %Lu %Lu block %Lu\n", em->start, em->len, em->block_start);
+out:
+ free_extent_map(em);
+ return sector;
+}
+
+static int add_lru(struct extent_io_tree *tree, struct extent_buffer *eb)
+{
+ if (list_empty(&eb->lru)) {
+ extent_buffer_get(eb);
+ list_add(&eb->lru, &tree->buffer_lru);
+ tree->lru_size++;
+ if (tree->lru_size >= BUFFER_LRU_MAX) {
+ struct extent_buffer *rm;
+ rm = list_entry(tree->buffer_lru.prev,
+ struct extent_buffer, lru);
+ tree->lru_size--;
+ list_del_init(&rm->lru);
+ free_extent_buffer(rm);
+ }
+ } else
+ list_move(&eb->lru, &tree->buffer_lru);
+ return 0;
+}
+static struct extent_buffer *find_lru(struct extent_io_tree *tree,
+ u64 start, unsigned long len)
+{
+ struct list_head *lru = &tree->buffer_lru;
+ struct list_head *cur = lru->next;
+ struct extent_buffer *eb;
+
+ if (list_empty(lru))
+ return NULL;
+
+ do {
+ eb = list_entry(cur, struct extent_buffer, lru);
+ if (eb->start == start && eb->len == len) {
+ extent_buffer_get(eb);
+ return eb;
+ }
+ cur = cur->next;
+ } while (cur != lru);
+ return NULL;
+}
+
+static inline unsigned long num_extent_pages(u64 start, u64 len)
+{
+ return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
+ (start >> PAGE_CACHE_SHIFT);
+}
+
+static inline struct page *extent_buffer_page(struct extent_buffer *eb,
+ unsigned long i)
+{
+ struct page *p;
+ struct address_space *mapping;
+
+ if (i == 0)
+ return eb->first_page;
+ i += eb->start >> PAGE_CACHE_SHIFT;
+ mapping = eb->first_page->mapping;
+ read_lock_irq(&mapping->tree_lock);
+ p = radix_tree_lookup(&mapping->page_tree, i);
+ read_unlock_irq(&mapping->tree_lock);
+ return p;
+}
+
+static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
+ u64 start,
+ unsigned long len,
+ gfp_t mask)
+{
+ struct extent_buffer *eb = NULL;
+
+ spin_lock(&tree->lru_lock);
+ eb = find_lru(tree, start, len);
+ spin_unlock(&tree->lru_lock);
+ if (eb) {
+ return eb;
+ }
+
+ eb = kmem_cache_zalloc(extent_buffer_cache, mask);
+ INIT_LIST_HEAD(&eb->lru);
+ eb->start = start;
+ eb->len = len;
+ atomic_set(&eb->refs, 1);
+
+ return eb;
+}
+
+static void __free_extent_buffer(struct extent_buffer *eb)
+{
+ kmem_cache_free(extent_buffer_cache, eb);
+}
+
+struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
+ u64 start, unsigned long len,
+ struct page *page0,
+ gfp_t mask)
+{
+ unsigned long num_pages = num_extent_pages(start, len);
+ unsigned long i;
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ struct extent_buffer *eb;
+ struct page *p;
+ struct address_space *mapping = tree->mapping;
+ int uptodate = 1;
+
+ eb = __alloc_extent_buffer(tree, start, len, mask);
+ if (!eb || IS_ERR(eb))
+ return NULL;
+
+ if (eb->flags & EXTENT_BUFFER_FILLED)
+ goto lru_add;
+
+ if (page0) {
+ eb->first_page = page0;
+ i = 1;
+ index++;
+ page_cache_get(page0);
+ mark_page_accessed(page0);
+ set_page_extent_mapped(page0);
+ WARN_ON(!PageUptodate(page0));
+ set_page_extent_head(page0, len);
+ } else {
+ i = 0;
+ }
+ for (; i < num_pages; i++, index++) {
+ p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
+ if (!p) {
+ WARN_ON(1);
+ goto fail;
+ }
+ set_page_extent_mapped(p);
+ mark_page_accessed(p);
+ if (i == 0) {
+ eb->first_page = p;
+ set_page_extent_head(p, len);
+ } else {
+ set_page_private(p, EXTENT_PAGE_PRIVATE);
+ }
+ if (!PageUptodate(p))
+ uptodate = 0;
+ unlock_page(p);
+ }
+ if (uptodate)
+ eb->flags |= EXTENT_UPTODATE;
+ eb->flags |= EXTENT_BUFFER_FILLED;
+
+lru_add:
+ spin_lock(&tree->lru_lock);
+ add_lru(tree, eb);
+ spin_unlock(&tree->lru_lock);
+ return eb;
+
+fail:
+ spin_lock(&tree->lru_lock);
+ list_del_init(&eb->lru);
+ spin_unlock(&tree->lru_lock);
+ if (!atomic_dec_and_test(&eb->refs))
+ return NULL;
+ for (index = 1; index < i; index++) {
+ page_cache_release(extent_buffer_page(eb, index));
+ }
+ if (i > 0)
+ page_cache_release(extent_buffer_page(eb, 0));
+ __free_extent_buffer(eb);
+ return NULL;
+}
+EXPORT_SYMBOL(alloc_extent_buffer);
+
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+ u64 start, unsigned long len,
+ gfp_t mask)
+{
+ unsigned long num_pages = num_extent_pages(start, len);
+ unsigned long i;
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ struct extent_buffer *eb;
+ struct page *p;
+ struct address_space *mapping = tree->mapping;
+ int uptodate = 1;
+
+ eb = __alloc_extent_buffer(tree, start, len, mask);
+ if (!eb || IS_ERR(eb))
+ return NULL;
+
+ if (eb->flags & EXTENT_BUFFER_FILLED)
+ goto lru_add;
+
+ for (i = 0; i < num_pages; i++, index++) {
+ p = find_lock_page(mapping, index);
+ if (!p) {
+ goto fail;
+ }
+ set_page_extent_mapped(p);
+ mark_page_accessed(p);
+
+ if (i == 0) {
+ eb->first_page = p;
+ set_page_extent_head(p, len);
+ } else {
+ set_page_private(p, EXTENT_PAGE_PRIVATE);
+ }
+
+ if (!PageUptodate(p))
+ uptodate = 0;
+ unlock_page(p);
+ }
+ if (uptodate)
+ eb->flags |= EXTENT_UPTODATE;
+ eb->flags |= EXTENT_BUFFER_FILLED;
+
+lru_add:
+ spin_lock(&tree->lru_lock);
+ add_lru(tree, eb);
+ spin_unlock(&tree->lru_lock);
+ return eb;
+fail:
+ spin_lock(&tree->lru_lock);
+ list_del_init(&eb->lru);
+ spin_unlock(&tree->lru_lock);
+ if (!atomic_dec_and_test(&eb->refs))
+ return NULL;
+ for (index = 1; index < i; index++) {
+ page_cache_release(extent_buffer_page(eb, index));
+ }
+ if (i > 0)
+ page_cache_release(extent_buffer_page(eb, 0));
+ __free_extent_buffer(eb);
+ return NULL;
+}
+EXPORT_SYMBOL(find_extent_buffer);
+
+void free_extent_buffer(struct extent_buffer *eb)
+{
+ unsigned long i;
+ unsigned long num_pages;
+
+ if (!eb)
+ return;
+
+ if (!atomic_dec_and_test(&eb->refs))
+ return;
+
+ WARN_ON(!list_empty(&eb->lru));
+ num_pages = num_extent_pages(eb->start, eb->len);
+
+ for (i = 1; i < num_pages; i++) {
+ page_cache_release(extent_buffer_page(eb, i));
+ }
+ page_cache_release(extent_buffer_page(eb, 0));
+ __free_extent_buffer(eb);
+}
+EXPORT_SYMBOL(free_extent_buffer);
+
+int clear_extent_buffer_dirty(struct extent_io_tree *tree,
+ struct extent_buffer *eb)
+{
+ int set;
+ unsigned long i;
+ unsigned long num_pages;
+ struct page *page;
+
+ u64 start = eb->start;
+ u64 end = start + eb->len - 1;
+
+ set = clear_extent_dirty(tree, start, end, GFP_NOFS);
+ num_pages = num_extent_pages(eb->start, eb->len);
+
+ for (i = 0; i < num_pages; i++) {
+ page = extent_buffer_page(eb, i);
+ lock_page(page);
+ if (i == 0)
+ set_page_extent_head(page, eb->len);
+ else
+ set_page_private(page, EXTENT_PAGE_PRIVATE);
+
+ /*
+ * if we're on the last page or the first page and the
+ * block isn't aligned on a page boundary, do extra checks
+ * to make sure we don't clean page that is partially dirty
+ */
+ if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
+ ((i == num_pages - 1) &&
+ ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
+ start = (u64)page->index << PAGE_CACHE_SHIFT;
+ end = start + PAGE_CACHE_SIZE - 1;
+ if (test_range_bit(tree, start, end,
+ EXTENT_DIRTY, 0)) {
+ unlock_page(page);
+ continue;
+ }
+ }
+ clear_page_dirty_for_io(page);
+ write_lock_irq(&page->mapping->tree_lock);
+ if (!PageDirty(page)) {
+ radix_tree_tag_clear(&page->mapping->page_tree,
+ page_index(page),
+ PAGECACHE_TAG_DIRTY);
+ }
+ write_unlock_irq(&page->mapping->tree_lock);
+ unlock_page(page);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(clear_extent_buffer_dirty);
+
+int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
+ struct extent_buffer *eb)
+{
+ return wait_on_extent_writeback(tree, eb->start,
+ eb->start + eb->len - 1);
+}
+EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
+
+int set_extent_buffer_dirty(struct extent_io_tree *tree,
+ struct extent_buffer *eb)
+{
+ unsigned long i;
+ unsigned long num_pages;
+
+ num_pages = num_extent_pages(eb->start, eb->len);
+ for (i = 0; i < num_pages; i++) {
+ struct page *page = extent_buffer_page(eb, i);
+ /* writepage may need to do something special for the
+ * first page, we have to make sure page->private is
+ * properly set. releasepage may drop page->private
+ * on us if the page isn't already dirty.
+ */
+ if (i == 0) {
+ lock_page(page);
+ set_page_extent_head(page, eb->len);
+ } else if (PagePrivate(page) &&
+ page->private != EXTENT_PAGE_PRIVATE) {
+ lock_page(page);
+ set_page_extent_mapped(page);
+ unlock_page(page);
+ }
+ __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
+ if (i == 0)
+ unlock_page(page);
+ }
+ return set_extent_dirty(tree, eb->start,
+ eb->start + eb->len - 1, GFP_NOFS);
+}
+EXPORT_SYMBOL(set_extent_buffer_dirty);
+
+int set_extent_buffer_uptodate(struct extent_io_tree *tree,
+ struct extent_buffer *eb)
+{
+ unsigned long i;
+ struct page *page;
+ unsigned long num_pages;
+
+ num_pages = num_extent_pages(eb->start, eb->len);
+
+ set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+ GFP_NOFS);
+ for (i = 0; i < num_pages; i++) {
+ page = extent_buffer_page(eb, i);
+ if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
+ ((i == num_pages - 1) &&
+ ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
+ check_page_uptodate(tree, page);
+ continue;
+ }
+ SetPageUptodate(page);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(set_extent_buffer_uptodate);
+
+int extent_buffer_uptodate(struct extent_io_tree *tree,
+ struct extent_buffer *eb)
+{
+ if (eb->flags & EXTENT_UPTODATE)
+ return 1;
+ return test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+ EXTENT_UPTODATE, 1);
+}
+EXPORT_SYMBOL(extent_buffer_uptodate);
+
+int read_extent_buffer_pages(struct extent_io_tree *tree,
+ struct extent_buffer *eb,
+ u64 start,
+ int wait)
+{
+ unsigned long i;
+ unsigned long start_i;
+ struct page *page;
+ int err;
+ int ret = 0;
+ unsigned long num_pages;
+
+ if (eb->flags & EXTENT_UPTODATE)
+ return 0;
+
+ if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+ EXTENT_UPTODATE, 1)) {
+ return 0;
+ }
+
+ if (start) {
+ WARN_ON(start < eb->start);
+ start_i = (start >> PAGE_CACHE_SHIFT) -
+ (eb->start >> PAGE_CACHE_SHIFT);
+ } else {
+ start_i = 0;
+ }
+
+ num_pages = num_extent_pages(eb->start, eb->len);
+ for (i = start_i; i < num_pages; i++) {
+ page = extent_buffer_page(eb, i);
+ if (PageUptodate(page)) {
+ continue;
+ }
+ if (!wait) {
+ if (TestSetPageLocked(page)) {
+ continue;
+ }
+ } else {
+ lock_page(page);
+ }
+ if (!PageUptodate(page)) {
+ err = page->mapping->a_ops->readpage(NULL, page);
+ if (err) {
+ ret = err;
+ }
+ } else {
+ unlock_page(page);
+ }
+ }
+
+ if (ret || !wait) {
+ return ret;
+ }
+
+ for (i = start_i; i < num_pages; i++) {
+ page = extent_buffer_page(eb, i);
+ wait_on_page_locked(page);
+ if (!PageUptodate(page)) {
+ ret = -EIO;
+ }
+ }
+ if (!ret)
+ eb->flags |= EXTENT_UPTODATE;
+ return ret;
+}
+EXPORT_SYMBOL(read_extent_buffer_pages);
+
+void read_extent_buffer(struct extent_buffer *eb, void *dstv,
+ unsigned long start,
+ unsigned long len)
+{
+ size_t cur;
+ size_t offset;
+ struct page *page;
+ char *kaddr;
+ char *dst = (char *)dstv;
+ size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+ unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+ unsigned long num_pages = num_extent_pages(eb->start, eb->len);
+
+ WARN_ON(start > eb->len);
+ WARN_ON(start + len > eb->start + eb->len);
+
+ offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+ while(len > 0) {
+ page = extent_buffer_page(eb, i);
+ if (!PageUptodate(page)) {
+ printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len);
+ WARN_ON(1);
+ }
+ WARN_ON(!PageUptodate(page));
+
+ cur = min(len, (PAGE_CACHE_SIZE - offset));
+ kaddr = kmap_atomic(page, KM_USER1);
+ memcpy(dst, kaddr + offset, cur);
+ kunmap_atomic(kaddr, KM_USER1);
+
+ dst += cur;
+ len -= cur;
+ offset = 0;
+ i++;
+ }
+}
+EXPORT_SYMBOL(read_extent_buffer);
+
+int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
+ unsigned long min_len, char **token, char **map,
+ unsigned long *map_start,
+ unsigned long *map_len, int km)
+{
+ size_t offset = start & (PAGE_CACHE_SIZE - 1);
+ char *kaddr;
+ struct page *p;
+ size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+ unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+ unsigned long end_i = (start_offset + start + min_len - 1) >>
+ PAGE_CACHE_SHIFT;
+
+ if (i != end_i)
+ return -EINVAL;
+
+ if (i == 0) {
+ offset = start_offset;
+ *map_start = 0;
+ } else {
+ offset = 0;
+ *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
+ }
+ if (start + min_len > eb->len) {
+printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len);
+ WARN_ON(1);
+ }
+
+ p = extent_buffer_page(eb, i);
+ WARN_ON(!PageUptodate(p));
+ kaddr = kmap_atomic(p, km);
+ *token = kaddr;
+ *map = kaddr + offset;
+ *map_len = PAGE_CACHE_SIZE - offset;
+ return 0;
+}
+EXPORT_SYMBOL(map_private_extent_buffer);
+
+int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
+ unsigned long min_len,
+ char **token, char **map,
+ unsigned long *map_start,
+ unsigned long *map_len, int km)
+{
+ int err;
+ int save = 0;
+ if (eb->map_token) {
+ unmap_extent_buffer(eb, eb->map_token, km);
+ eb->map_token = NULL;
+ save = 1;
+ }
+ err = map_private_extent_buffer(eb, start, min_len, token, map,
+ map_start, map_len, km);
+ if (!err && save) {
+ eb->map_token = *token;
+ eb->kaddr = *map;
+ eb->map_start = *map_start;
+ eb->map_len = *map_len;
+ }
+ return err;
+}
+EXPORT_SYMBOL(map_extent_buffer);
+
+void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
+{
+ kunmap_atomic(token, km);
+}
+EXPORT_SYMBOL(unmap_extent_buffer);
+
+int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
+ unsigned long start,
+ unsigned long len)
+{
+ size_t cur;
+ size_t offset;
+ struct page *page;
+ char *kaddr;
+ char *ptr = (char *)ptrv;
+ size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+ unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+ int ret = 0;
+
+ WARN_ON(start > eb->len);
+ WARN_ON(start + len > eb->start + eb->len);
+
+ offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+ while(len > 0) {
+ page = extent_buffer_page(eb, i);
+ WARN_ON(!PageUptodate(page));
+
+ cur = min(len, (PAGE_CACHE_SIZE - offset));
+
+ kaddr = kmap_atomic(page, KM_USER0);
+ ret = memcmp(ptr, kaddr + offset, cur);
+ kunmap_atomic(kaddr, KM_USER0);
+ if (ret)
+ break;
+
+ ptr += cur;
+ len -= cur;
+ offset = 0;
+ i++;
+ }
+ return ret;
+}
+EXPORT_SYMBOL(memcmp_extent_buffer);
+
+void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
+ unsigned long start, unsigned long len)
+{
+ size_t cur;
+ size_t offset;
+ struct page *page;
+ char *kaddr;
+ char *src = (char *)srcv;
+ size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+ unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+
+ WARN_ON(start > eb->len);
+ WARN_ON(start + len > eb->start + eb->len);
+
+ offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+ while(len > 0) {
+ page = extent_buffer_page(eb, i);
+ WARN_ON(!PageUptodate(page));
+
+ cur = min(len, PAGE_CACHE_SIZE - offset);
+ kaddr = kmap_atomic(page, KM_USER1);
+ memcpy(kaddr + offset, src, cur);
+ kunmap_atomic(kaddr, KM_USER1);
+
+ src += cur;
+ len -= cur;
+ offset = 0;
+ i++;
+ }
+}
+EXPORT_SYMBOL(write_extent_buffer);
+
+void memset_extent_buffer(struct extent_buffer *eb, char c,
+ unsigned long start, unsigned long len)
+{
+ size_t cur;
+ size_t offset;
+ struct page *page;
+ char *kaddr;
+ size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+ unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+
+ WARN_ON(start > eb->len);
+ WARN_ON(start + len > eb->start + eb->len);
+
+ offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+ while(len > 0) {
+ page = extent_buffer_page(eb, i);
+ WARN_ON(!PageUptodate(page));
+
+ cur = min(len, PAGE_CACHE_SIZE - offset);
+ kaddr = kmap_atomic(page, KM_USER0);
+ memset(kaddr + offset, c, cur);
+ kunmap_atomic(kaddr, KM_USER0);
+
+ len -= cur;
+ offset = 0;
+ i++;
+ }
+}
+EXPORT_SYMBOL(memset_extent_buffer);
+
+void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+ unsigned long dst_offset, unsigned long src_offset,
+ unsigned long len)
+{
+ u64 dst_len = dst->len;
+ size_t cur;
+ size_t offset;
+ struct page *page;
+ char *kaddr;
+ size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
+ unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
+
+ WARN_ON(src->len != dst_len);
+
+ offset = (start_offset + dst_offset) &
+ ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+ while(len > 0) {
+ page = extent_buffer_page(dst, i);
+ WARN_ON(!PageUptodate(page));
+
+ cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
+
+ kaddr = kmap_atomic(page, KM_USER0);
+ read_extent_buffer(src, kaddr + offset, src_offset, cur);
+ kunmap_atomic(kaddr, KM_USER0);
+
+ src_offset += cur;
+ len -= cur;
+ offset = 0;
+ i++;
+ }
+}
+EXPORT_SYMBOL(copy_extent_buffer);
+
+static void move_pages(struct page *dst_page, struct page *src_page,
+ unsigned long dst_off, unsigned long src_off,
+ unsigned long len)
+{
+ char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+ if (dst_page == src_page) {
+ memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
+ } else {
+ char *src_kaddr = kmap_atomic(src_page, KM_USER1);
+ char *p = dst_kaddr + dst_off + len;
+ char *s = src_kaddr + src_off + len;
+
+ while (len--)
+ *--p = *--s;
+
+ kunmap_atomic(src_kaddr, KM_USER1);
+ }
+ kunmap_atomic(dst_kaddr, KM_USER0);
+}
+
+static void copy_pages(struct page *dst_page, struct page *src_page,
+ unsigned long dst_off, unsigned long src_off,
+ unsigned long len)
+{
+ char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+ char *src_kaddr;
+
+ if (dst_page != src_page)
+ src_kaddr = kmap_atomic(src_page, KM_USER1);
+ else
+ src_kaddr = dst_kaddr;
+
+ memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
+ kunmap_atomic(dst_kaddr, KM_USER0);
+ if (dst_page != src_page)
+ kunmap_atomic(src_kaddr, KM_USER1);
+}
+
+void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+ unsigned long src_offset, unsigned long len)
+{
+ size_t cur;
+ size_t dst_off_in_page;
+ size_t src_off_in_page;
+ size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
+ unsigned long dst_i;
+ unsigned long src_i;
+
+ if (src_offset + len > dst->len) {
+ printk("memmove bogus src_offset %lu move len %lu len %lu\n",
+ src_offset, len, dst->len);
+ BUG_ON(1);
+ }
+ if (dst_offset + len > dst->len) {
+ printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
+ dst_offset, len, dst->len);
+ BUG_ON(1);
+ }
+
+ while(len > 0) {
+ dst_off_in_page = (start_offset + dst_offset) &
+ ((unsigned long)PAGE_CACHE_SIZE - 1);
+ src_off_in_page = (start_offset + src_offset) &
+ ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+ dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
+ src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
+
+ cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
+ src_off_in_page));
+ cur = min_t(unsigned long, cur,
+ (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
+
+ copy_pages(extent_buffer_page(dst, dst_i),
+ extent_buffer_page(dst, src_i),
+ dst_off_in_page, src_off_in_page, cur);
+
+ src_offset += cur;
+ dst_offset += cur;
+ len -= cur;
+ }
+}
+EXPORT_SYMBOL(memcpy_extent_buffer);
+
+void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+ unsigned long src_offset, unsigned long len)
+{
+ size_t cur;
+ size_t dst_off_in_page;
+ size_t src_off_in_page;
+ unsigned long dst_end = dst_offset + len - 1;
+ unsigned long src_end = src_offset + len - 1;
+ size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
+ unsigned long dst_i;
+ unsigned long src_i;
+
+ if (src_offset + len > dst->len) {
+ printk("memmove bogus src_offset %lu move len %lu len %lu\n",
+ src_offset, len, dst->len);
+ BUG_ON(1);
+ }
+ if (dst_offset + len > dst->len) {
+ printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
+ dst_offset, len, dst->len);
+ BUG_ON(1);
+ }
+ if (dst_offset < src_offset) {
+ memcpy_extent_buffer(dst, dst_offset, src_offset, len);
+ return;
+ }
+ while(len > 0) {
+ dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
+ src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
+
+ dst_off_in_page = (start_offset + dst_end) &
+ ((unsigned long)PAGE_CACHE_SIZE - 1);
+ src_off_in_page = (start_offset + src_end) &
+ ((unsigned long)PAGE_CACHE_SIZE - 1);
+
+ cur = min_t(unsigned long, len, src_off_in_page + 1);
+ cur = min(cur, dst_off_in_page + 1);
+ move_pages(extent_buffer_page(dst, dst_i),
+ extent_buffer_page(dst, src_i),
+ dst_off_in_page - cur + 1,
+ src_off_in_page - cur + 1, cur);
+
+ dst_end -= cur;
+ src_end -= cur;
+ len -= cur;
+ }
+}
+EXPORT_SYMBOL(memmove_extent_buffer);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
new file mode 100644
index 000000000000..06be1fe84b29
--- /dev/null
+++ b/fs/btrfs/extent_io.h
@@ -0,0 +1,193 @@
+#ifndef __EXTENTIO__
+#define __EXTENTIO__
+
+#include <linux/rbtree.h>
+
+/* bits for the extent state */
+#define EXTENT_DIRTY 1
+#define EXTENT_WRITEBACK (1 << 1)
+#define EXTENT_UPTODATE (1 << 2)
+#define EXTENT_LOCKED (1 << 3)
+#define EXTENT_NEW (1 << 4)
+#define EXTENT_DELALLOC (1 << 5)
+#define EXTENT_DEFRAG (1 << 6)
+#define EXTENT_DEFRAG_DONE (1 << 7)
+#define EXTENT_BUFFER_FILLED (1 << 8)
+#define EXTENT_CSUM (1 << 9)
+#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
+
+/*
+ * page->private values. Every page that is controlled by the extent
+ * map has page->private set to one.
+ */
+#define EXTENT_PAGE_PRIVATE 1
+#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3
+
+struct extent_io_ops {
+ int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
+ int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
+ int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
+ int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end);
+ void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end);
+};
+
+struct extent_io_tree {
+ struct rb_root state;
+ struct address_space *mapping;
+ u64 dirty_bytes;
+ rwlock_t lock;
+ struct extent_io_ops *ops;
+ spinlock_t lru_lock;
+ struct list_head buffer_lru;
+ int lru_size;
+};
+
+struct extent_state {
+ u64 start;
+ u64 end; /* inclusive */
+ int in_tree;
+ struct rb_node rb_node;
+ wait_queue_head_t wq;
+ atomic_t refs;
+ unsigned long state;
+
+ /* for use by the FS */
+ u64 private;
+
+ struct list_head list;
+};
+
+struct extent_buffer {
+ u64 start;
+ unsigned long len;
+ char *map_token;
+ char *kaddr;
+ unsigned long map_start;
+ unsigned long map_len;
+ struct page *first_page;
+ struct list_head lru;
+ atomic_t refs;
+ int flags;
+};
+
+struct extent_map_tree;
+
+typedef struct extent_map *(get_extent_t)(struct inode *inode,
+ struct page *page,
+ size_t page_offset,
+ u64 start, u64 len,
+ int create);
+
+void extent_io_tree_init(struct extent_io_tree *tree,
+ struct address_space *mapping, gfp_t mask);
+void extent_io_tree_empty_lru(struct extent_io_tree *tree);
+int try_release_extent_mapping(struct extent_map_tree *map,
+ struct extent_io_tree *tree, struct page *page);
+int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
+int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
+int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
+ get_extent_t *get_extent);
+int __init extent_io_init(void);
+void extent_io_exit(void);
+
+u64 count_range_bits(struct extent_io_tree *tree,
+ u64 *start, u64 search_end,
+ u64 max_bytes, unsigned long bits);
+
+int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, int filled);
+int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, gfp_t mask);
+int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, gfp_t mask);
+int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask);
+int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask);
+int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask);
+int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask);
+int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask);
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, int bits);
+int extent_invalidatepage(struct extent_io_tree *tree,
+ struct page *page, unsigned long offset);
+int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
+ get_extent_t *get_extent,
+ struct writeback_control *wbc);
+int extent_writepages(struct extent_io_tree *tree,
+ struct address_space *mapping,
+ get_extent_t *get_extent,
+ struct writeback_control *wbc);
+int extent_readpages(struct extent_io_tree *tree,
+ struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages,
+ get_extent_t get_extent);
+int extent_prepare_write(struct extent_io_tree *tree,
+ struct inode *inode, struct page *page,
+ unsigned from, unsigned to, get_extent_t *get_extent);
+int extent_commit_write(struct extent_io_tree *tree,
+ struct inode *inode, struct page *page,
+ unsigned from, unsigned to);
+sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
+ get_extent_t *get_extent);
+int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end);
+int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
+int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
+void set_page_extent_mapped(struct page *page);
+
+struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
+ u64 start, unsigned long len,
+ struct page *page0,
+ gfp_t mask);
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+ u64 start, unsigned long len,
+ gfp_t mask);
+void free_extent_buffer(struct extent_buffer *eb);
+int read_extent_buffer_pages(struct extent_io_tree *tree,
+ struct extent_buffer *eb, u64 start, int wait);
+
+static inline void extent_buffer_get(struct extent_buffer *eb)
+{
+ atomic_inc(&eb->refs);
+}
+
+int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
+ unsigned long start,
+ unsigned long len);
+void read_extent_buffer(struct extent_buffer *eb, void *dst,
+ unsigned long start,
+ unsigned long len);
+void write_extent_buffer(struct extent_buffer *eb, const void *src,
+ unsigned long start, unsigned long len);
+void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+ unsigned long dst_offset, unsigned long src_offset,
+ unsigned long len);
+void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+ unsigned long src_offset, unsigned long len);
+void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+ unsigned long src_offset, unsigned long len);
+void memset_extent_buffer(struct extent_buffer *eb, char c,
+ unsigned long start, unsigned long len);
+int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
+ struct extent_buffer *eb);
+int clear_extent_buffer_dirty(struct extent_io_tree *tree,
+ struct extent_buffer *eb);
+int set_extent_buffer_dirty(struct extent_io_tree *tree,
+ struct extent_buffer *eb);
+int set_extent_buffer_uptodate(struct extent_io_tree *tree,
+ struct extent_buffer *eb);
+int extent_buffer_uptodate(struct extent_io_tree *tree,
+ struct extent_buffer *eb);
+int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
+ unsigned long min_len, char **token, char **map,
+ unsigned long *map_start,
+ unsigned long *map_len, int km);
+int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
+ unsigned long min_len, char **token, char **map,
+ unsigned long *map_start,
+ unsigned long *map_len, int km);
+void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
+#endif
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 010a287fbd71..268ad8facf6e 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -1,17 +1,10 @@
-#include <linux/bitops.h>
-#include <linux/slab.h>
-#include <linux/bio.h>
-#include <linux/mm.h>
+#include <linux/err.h>
#include <linux/gfp.h>
-#include <linux/pagemap.h>
-#include <linux/page-flags.h>
+#include <linux/slab.h>
#include <linux/module.h>
#include <linux/spinlock.h>
-#include <linux/blkdev.h>
-#include <linux/swap.h>
#include <linux/version.h>
-#include <linux/writeback.h>
-#include <linux/pagevec.h>
+#include <linux/hardirq.h>
#include "extent_map.h"
/* temporary define until extent_map moves out of btrfs */
@@ -21,27 +14,6 @@ struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
unsigned long));
static struct kmem_cache *extent_map_cache;
-static struct kmem_cache *extent_state_cache;
-static struct kmem_cache *extent_buffer_cache;
-
-static LIST_HEAD(buffers);
-static LIST_HEAD(states);
-
-static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
-#define BUFFER_LRU_MAX 64
-
-struct tree_entry {
- u64 start;
- u64 end;
- int in_tree;
- struct rb_node rb_node;
-};
-
-struct extent_page_data {
- struct bio *bio;
- struct extent_map_tree *tree;
- get_extent_t *get_extent;
-};
int __init extent_map_init(void)
{
@@ -50,72 +22,23 @@ int __init extent_map_init(void)
NULL);
if (!extent_map_cache)
return -ENOMEM;
- extent_state_cache = btrfs_cache_create("extent_state",
- sizeof(struct extent_state), 0,
- NULL);
- if (!extent_state_cache)
- goto free_map_cache;
- extent_buffer_cache = btrfs_cache_create("extent_buffers",
- sizeof(struct extent_buffer), 0,
- NULL);
- if (!extent_buffer_cache)
- goto free_state_cache;
return 0;
-
-free_state_cache:
- kmem_cache_destroy(extent_state_cache);
-free_map_cache:
- kmem_cache_destroy(extent_map_cache);
- return -ENOMEM;
}
void extent_map_exit(void)
{
- struct extent_state *state;
-
- while (!list_empty(&states)) {
- state = list_entry(states.next, struct extent_state, list);
- printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs));
- list_del(&state->list);
- kmem_cache_free(extent_state_cache, state);
-
- }
-
if (extent_map_cache)
kmem_cache_destroy(extent_map_cache);
- if (extent_state_cache)
- kmem_cache_destroy(extent_state_cache);
- if (extent_buffer_cache)
- kmem_cache_destroy(extent_buffer_cache);
}
-void extent_map_tree_init(struct extent_map_tree *tree,
- struct address_space *mapping, gfp_t mask)
+void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
{
tree->map.rb_node = NULL;
- tree->state.rb_node = NULL;
- tree->ops = NULL;
- tree->dirty_bytes = 0;
- rwlock_init(&tree->lock);
- spin_lock_init(&tree->lru_lock);
- tree->mapping = mapping;
- INIT_LIST_HEAD(&tree->buffer_lru);
- tree->lru_size = 0;
+ tree->last = NULL;
+ spin_lock_init(&tree->lock);
}
EXPORT_SYMBOL(extent_map_tree_init);
-void extent_map_tree_empty_lru(struct extent_map_tree *tree)
-{
- struct extent_buffer *eb;
- while(!list_empty(&tree->buffer_lru)) {
- eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
- lru);
- list_del_init(&eb->lru);
- free_extent_buffer(eb);
- }
-}
-EXPORT_SYMBOL(extent_map_tree_empty_lru);
-
struct extent_map *alloc_extent_map(gfp_t mask)
{
struct extent_map *em;
@@ -123,6 +46,7 @@ struct extent_map *alloc_extent_map(gfp_t mask)
if (!em || IS_ERR(em))
return em;
em->in_tree = 0;
+ em->flags = 0;
atomic_set(&em->refs, 1);
return em;
}
@@ -132,6 +56,7 @@ void free_extent_map(struct extent_map *em)
{
if (!em)
return;
+ WARN_ON(atomic_read(&em->refs) == 0);
if (atomic_dec_and_test(&em->refs)) {
WARN_ON(em->in_tree);
kmem_cache_free(extent_map_cache, em);
@@ -139,64 +64,28 @@ void free_extent_map(struct extent_map *em)
}
EXPORT_SYMBOL(free_extent_map);
-
-struct extent_state *alloc_extent_state(gfp_t mask)
-{
- struct extent_state *state;
- unsigned long flags;
-
- state = kmem_cache_alloc(extent_state_cache, mask);
- if (!state || IS_ERR(state))
- return state;
- state->state = 0;
- state->in_tree = 0;
- state->private = 0;
-
- spin_lock_irqsave(&state_lock, flags);
- list_add(&state->list, &states);
- spin_unlock_irqrestore(&state_lock, flags);
-
- atomic_set(&state->refs, 1);
- init_waitqueue_head(&state->wq);
- return state;
-}
-EXPORT_SYMBOL(alloc_extent_state);
-
-void free_extent_state(struct extent_state *state)
-{
- unsigned long flags;
- if (!state)
- return;
- if (atomic_dec_and_test(&state->refs)) {
- WARN_ON(state->in_tree);
- spin_lock_irqsave(&state_lock, flags);
- list_del(&state->list);
- spin_unlock_irqrestore(&state_lock, flags);
- kmem_cache_free(extent_state_cache, state);
- }
-}
-EXPORT_SYMBOL(free_extent_state);
-
static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
struct rb_node *node)
{
struct rb_node ** p = &root->rb_node;
struct rb_node * parent = NULL;
- struct tree_entry *entry;
+ struct extent_map *entry;
while(*p) {
parent = *p;
- entry = rb_entry(parent, struct tree_entry, rb_node);
+ entry = rb_entry(parent, struct extent_map, rb_node);
+
+ WARN_ON(!entry->in_tree);
if (offset < entry->start)
p = &(*p)->rb_left;
- else if (offset > entry->end)
+ else if (offset >= extent_map_end(entry))
p = &(*p)->rb_right;
else
return parent;
}
- entry = rb_entry(node, struct tree_entry, rb_node);
+ entry = rb_entry(node, struct extent_map, rb_node);
entry->in_tree = 1;
rb_link_node(node, parent, p);
rb_insert_color(node, root);
@@ -210,17 +99,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
struct rb_node * n = root->rb_node;
struct rb_node *prev = NULL;
struct rb_node *orig_prev = NULL;
- struct tree_entry *entry;
- struct tree_entry *prev_entry = NULL;
+ struct extent_map *entry;
+ struct extent_map *prev_entry = NULL;
while(n) {
- entry = rb_entry(n, struct tree_entry, rb_node);
+ entry = rb_entry(n, struct extent_map, rb_node);
prev = n;
prev_entry = entry;
+ WARN_ON(!entry->in_tree);
+
if (offset < entry->start)
n = n->rb_left;
- else if (offset > entry->end)
+ else if (offset >= extent_map_end(entry))
n = n->rb_right;
else
return n;
@@ -228,19 +119,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
if (prev_ret) {
orig_prev = prev;
- while(prev && offset > prev_entry->end) {
+ while(prev && offset >= extent_map_end(prev_entry)) {
prev = rb_next(prev);
- prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+ prev_entry = rb_entry(prev, struct extent_map, rb_node);
}
*prev_ret = prev;
prev = orig_prev;
}
if (next_ret) {
- prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+ prev_entry = rb_entry(prev, struct extent_map, rb_node);
while(prev && offset < prev_entry->start) {
prev = rb_prev(prev);
- prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+ prev_entry = rb_entry(prev, struct extent_map, rb_node);
}
*next_ret = prev;
}
@@ -257,22 +148,26 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
return ret;
}
-static int tree_delete(struct rb_root *root, u64 offset)
+static int mergable_maps(struct extent_map *prev, struct extent_map *next)
{
- struct rb_node *node;
- struct tree_entry *entry;
-
- node = __tree_search(root, offset, NULL, NULL);
- if (!node)
- return -ENOENT;
- entry = rb_entry(node, struct tree_entry, rb_node);
- entry->in_tree = 0;
- rb_erase(node, root);
+ if (extent_map_end(prev) == next->start &&
+ prev->flags == next->flags &&
+ prev->bdev == next->bdev &&
+ ((next->block_start == EXTENT_MAP_HOLE &&
+ prev->block_start == EXTENT_MAP_HOLE) ||
+ (next->block_start == EXTENT_MAP_INLINE &&
+ prev->block_start == EXTENT_MAP_INLINE) ||
+ (next->block_start == EXTENT_MAP_DELALLOC &&
+ prev->block_start == EXTENT_MAP_DELALLOC) ||
+ (next->block_start < EXTENT_MAP_LAST_BYTE - 1 &&
+ next->block_start == extent_map_block_end(prev)))) {
+ return 1;
+ }
return 0;
}
/*
- * add_extent_mapping tries a simple backward merge with existing
+ * add_extent_mapping tries a simple forward/backward merge with existing
* mappings. The extent_map struct passed in will be inserted into
* the tree directly (no copies made, just a reference taken).
*/
@@ -280,13 +175,12 @@ int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em)
{
int ret = 0;
- struct extent_map *prev = NULL;
+ struct extent_map *merge = NULL;
struct rb_node *rb;
- write_lock_irq(&tree->lock);
- rb = tree_insert(&tree->map, em->end, &em->rb_node);
+ rb = tree_insert(&tree->map, em->start, &em->rb_node);
if (rb) {
- prev = rb_entry(rb, struct extent_map, rb_node);
+ merge = rb_entry(rb, struct extent_map, rb_node);
ret = -EEXIST;
goto out;
}
@@ -294,53 +188,60 @@ int add_extent_mapping(struct extent_map_tree *tree,
if (em->start != 0) {
rb = rb_prev(&em->rb_node);
if (rb)
- prev = rb_entry(rb, struct extent_map, rb_node);
- if (prev && prev->end + 1 == em->start &&
- ((em->block_start == EXTENT_MAP_HOLE &&
- prev->block_start == EXTENT_MAP_HOLE) ||
- (em->block_start == EXTENT_MAP_INLINE &&
- prev->block_start == EXTENT_MAP_INLINE) ||
- (em->block_start == EXTENT_MAP_DELALLOC &&
- prev->block_start == EXTENT_MAP_DELALLOC) ||
- (em->block_start < EXTENT_MAP_DELALLOC - 1 &&
- em->block_start == prev->block_end + 1))) {
- em->start = prev->start;
- em->block_start = prev->block_start;
- rb_erase(&prev->rb_node, &tree->map);
- prev->in_tree = 0;
- free_extent_map(prev);
+ merge = rb_entry(rb, struct extent_map, rb_node);
+ if (rb && mergable_maps(merge, em)) {
+ em->start = merge->start;
+ em->len += merge->len;
+ em->block_start = merge->block_start;
+ merge->in_tree = 0;
+ rb_erase(&merge->rb_node, &tree->map);
+ free_extent_map(merge);
}
}
+ rb = rb_next(&em->rb_node);
+ if (rb)
+ merge = rb_entry(rb, struct extent_map, rb_node);
+ if (rb && mergable_maps(em, merge)) {
+ em->len += merge->len;
+ rb_erase(&merge->rb_node, &tree->map);
+ merge->in_tree = 0;
+ free_extent_map(merge);
+ }
+ tree->last = em;
out:
- write_unlock_irq(&tree->lock);
return ret;
}
EXPORT_SYMBOL(add_extent_mapping);
+static u64 range_end(u64 start, u64 len)
+{
+ if (start + len < start)
+ return (u64)-1;
+ return start + len;
+}
+
/*
* lookup_extent_mapping returns the first extent_map struct in the
- * tree that intersects the [start, end] (inclusive) range. There may
+ * tree that intersects the [start, len] range. There may
* be additional objects in the tree that intersect, so check the object
* returned carefully to make sure you don't need additional lookups.
*/
struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 end)
+ u64 start, u64 len)
{
struct extent_map *em;
struct rb_node *rb_node;
- struct rb_node *prev = NULL;
- struct rb_node *next = NULL;
+ struct rb_node *prev = NULL; struct rb_node *next = NULL; u64 end = range_end(start, len); em = tree->last; if (em && end > em->start && start < extent_map_end(em)) goto found;
- read_lock_irq(&tree->lock);
rb_node = __tree_search(&tree->map, start, &prev, &next);
if (!rb_node && prev) {
em = rb_entry(prev, struct extent_map, rb_node);
- if (em->start <= end && em->end >= start)
+ if (end > em->start && start < extent_map_end(em))
goto found;
}
if (!rb_node && next) {
em = rb_entry(next, struct extent_map, rb_node);
- if (em->start <= end && em->end >= start)
+ if (end > em->start && start < extent_map_end(em))
goto found;
}
if (!rb_node) {
@@ -352,14 +253,16 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
goto out;
}
em = rb_entry(rb_node, struct extent_map, rb_node);
- if (em->end < start || em->start > end) {
- em = NULL;
- goto out;
- }
+ if (end > em->start && start < extent_map_end(em))
+ goto found;
+
+ em = NULL;
+ goto out;
+
found:
atomic_inc(&em->refs);
+ tree->last = em;
out:
- read_unlock_irq(&tree->lock);
return em;
}
EXPORT_SYMBOL(lookup_extent_mapping);
@@ -370,2866 +273,12 @@ EXPORT_SYMBOL(lookup_extent_mapping);
*/
int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
{
- int ret;
-
- write_lock_irq(&tree->lock);
- ret = tree_delete(&tree->map, em->end);
- write_unlock_irq(&tree->lock);
- return ret;
-}
-EXPORT_SYMBOL(remove_extent_mapping);
-
-/*
- * utility function to look for merge candidates inside a given range.
- * Any extents with matching state are merged together into a single
- * extent in the tree. Extents with EXTENT_IO in their state field
- * are not merged because the end_io handlers need to be able to do
- * operations on them without sleeping (or doing allocations/splits).
- *
- * This should be called with the tree lock held.
- */
-static int merge_state(struct extent_map_tree *tree,
- struct extent_state *state)
-{
- struct extent_state *other;
- struct rb_node *other_node;
-
- if (state->state & EXTENT_IOBITS)
- return 0;
-
- other_node = rb_prev(&state->rb_node);
- if (other_node) {
- other = rb_entry(other_node, struct extent_state, rb_node);
- if (other->end == state->start - 1 &&
- other->state == state->state) {
- state->start = other->start;
- other->in_tree = 0;
- rb_erase(&other->rb_node, &tree->state);
- free_extent_state(other);
- }
- }
- other_node = rb_next(&state->rb_node);
- if (other_node) {
- other = rb_entry(other_node, struct extent_state, rb_node);
- if (other->start == state->end + 1 &&
- other->state == state->state) {
- other->start = state->start;
- state->in_tree = 0;
- rb_erase(&state->rb_node, &tree->state);
- free_extent_state(state);
- }
- }
- return 0;
-}
-
-/*
- * insert an extent_state struct into the tree. 'bits' are set on the
- * struct before it is inserted.
- *
- * This may return -EEXIST if the extent is already there, in which case the
- * state struct is freed.
- *
- * The tree lock is not taken internally. This is a utility function and
- * probably isn't what you want to call (see set/clear_extent_bit).
- */
-static int insert_state(struct extent_map_tree *tree,
- struct extent_state *state, u64 start, u64 end,
- int bits)
-{
- struct rb_node *node;
-
- if (end < start) {
- printk("end < start %Lu %Lu\n", end, start);
- WARN_ON(1);
- }
- if (bits & EXTENT_DIRTY)
- tree->dirty_bytes += end - start + 1;
- state->state |= bits;
- state->start = start;
- state->end = end;
- node = tree_insert(&tree->state, end, &state->rb_node);
- if (node) {
- struct extent_state *found;
- found = rb_entry(node, struct extent_state, rb_node);
- printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
- free_extent_state(state);
- return -EEXIST;
- }
- merge_state(tree, state);
- return 0;
-}
-
-/*
- * split a given extent state struct in two, inserting the preallocated
- * struct 'prealloc' as the newly created second half. 'split' indicates an
- * offset inside 'orig' where it should be split.
- *
- * Before calling,
- * the tree has 'orig' at [orig->start, orig->end]. After calling, there
- * are two extent state structs in the tree:
- * prealloc: [orig->start, split - 1]
- * orig: [ split, orig->end ]
- *
- * The tree locks are not taken by this function. They need to be held
- * by the caller.
- */
-static int split_state(struct extent_map_tree *tree, struct extent_state *orig,
- struct extent_state *prealloc, u64 split)
-{
- struct rb_node *node;
- prealloc->start = orig->start;
- prealloc->end = split - 1;
- prealloc->state = orig->state;
- orig->start = split;
-
- node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
- if (node) {
- struct extent_state *found;
- found = rb_entry(node, struct extent_state, rb_node);
- printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
- free_extent_state(prealloc);
- return -EEXIST;
- }
- return 0;
-}
-
-/*
- * utility function to clear some bits in an extent state struct.
- * it will optionally wake up any one waiting on this state (wake == 1), or
- * forcibly remove the state from the tree (delete == 1).
- *
- * If no bits are set on the state struct after clearing things, the
- * struct is freed and removed from the tree
- */
-static int clear_state_bit(struct extent_map_tree *tree,
- struct extent_state *state, int bits, int wake,
- int delete)
-{
- int ret = state->state & bits;
-
- if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
- u64 range = state->end - state->start + 1;
- WARN_ON(range > tree->dirty_bytes);
- tree->dirty_bytes -= range;
- }
- state->state &= ~bits;
- if (wake)
- wake_up(&state->wq);
- if (delete || state->state == 0) {
- if (state->in_tree) {
- rb_erase(&state->rb_node, &tree->state);
- state->in_tree = 0;
- free_extent_state(state);
- } else {
- WARN_ON(1);
- }
- } else {
- merge_state(tree, state);
- }
- return ret;
-}
-
-/*
- * clear some bits on a range in the tree. This may require splitting
- * or inserting elements in the tree, so the gfp mask is used to
- * indicate which allocations or sleeping are allowed.
- *
- * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
- * the given range from the tree regardless of state (ie for truncate).
- *
- * the range [start, end] is inclusive.
- *
- * This takes the tree lock, and returns < 0 on error, > 0 if any of the
- * bits were already set, or zero if none of the bits were already set.
- */
-int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end,
- int bits, int wake, int delete, gfp_t mask)
-{
- struct extent_state *state;
- struct extent_state *prealloc = NULL;
- struct rb_node *node;
- unsigned long flags;
- int err;
- int set = 0;
-
-again:
- if (!prealloc && (mask & __GFP_WAIT)) {
- prealloc = alloc_extent_state(mask);
- if (!prealloc)
- return -ENOMEM;
- }
-
- write_lock_irqsave(&tree->lock, flags);
- /*
- * this search will find the extents that end after
- * our range starts
- */
- node = tree_search(&tree->state, start);
- if (!node)
- goto out;
- state = rb_entry(node, struct extent_state, rb_node);
- if (state->start > end)
- goto out;
- WARN_ON(state->end < start);
-
- /*
- * | ---- desired range ---- |
- * | state | or
- * | ------------- state -------------- |
- *
- * We need to split the extent we found, and may flip
- * bits on second half.
- *
- * If the extent we found extends past our range, we
- * just split and search again. It'll get split again
- * the next time though.
- *
- * If the extent we found is inside our range, we clear
- * the desired bit on it.
- */
-
- if (state->start < start) {
- err = split_state(tree, state, prealloc, start);
- BUG_ON(err == -EEXIST);
- prealloc = NULL;
- if (err)
- goto out;
- if (state->end <= end) {
- start = state->end + 1;
- set |= clear_state_bit(tree, state, bits,
- wake, delete);
- } else {
- start = state->start;
- }
- goto search_again;
- }
- /*
- * | ---- desired range ---- |
- * | state |
- * We need to split the extent, and clear the bit
- * on the first half
- */
- if (state->start <= end && state->end > end) {
- err = split_state(tree, state, prealloc, end + 1);
- BUG_ON(err == -EEXIST);
-
- if (wake)
- wake_up(&state->wq);
- set |= clear_state_bit(tree, prealloc, bits,
- wake, delete);
- prealloc = NULL;
- goto out;
- }
-
- start = state->end + 1;
- set |= clear_state_bit(tree, state, bits, wake, delete);
- goto search_again;
-
-out:
- write_unlock_irqrestore(&tree->lock, flags);
- if (prealloc)
- free_extent_state(prealloc);
-
- return set;
-
-search_again:
- if (start > end)
- goto out;
- write_unlock_irqrestore(&tree->lock, flags);
- if (mask & __GFP_WAIT)
- cond_resched();
- goto again;
-}
-EXPORT_SYMBOL(clear_extent_bit);
-
-static int wait_on_state(struct extent_map_tree *tree,
- struct extent_state *state)
-{
- DEFINE_WAIT(wait);
- prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
- read_unlock_irq(&tree->lock);
- schedule();
- read_lock_irq(&tree->lock);
- finish_wait(&state->wq, &wait);
- return 0;
-}
-
-/*
- * waits for one or more bits to clear on a range in the state tree.
- * The range [start, end] is inclusive.
- * The tree lock is taken by this function
- */
-int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits)
-{
- struct extent_state *state;
- struct rb_node *node;
-
- read_lock_irq(&tree->lock);
-again:
- while (1) {
- /*
- * this search will find all the extents that end after
- * our range starts
- */
- node = tree_search(&tree->state, start);
- if (!node)
- break;
-
- state = rb_entry(node, struct extent_state, rb_node);
-
- if (state->start > end)
- goto out;
-
- if (state->state & bits) {
- start = state->start;
- atomic_inc(&state->refs);
- wait_on_state(tree, state);
- free_extent_state(state);
- goto again;
- }
- start = state->end + 1;
-
- if (start > end)
- break;
-
- if (need_resched()) {
- read_unlock_irq(&tree->lock);
- cond_resched();
- read_lock_irq(&tree->lock);
- }
- }
-out:
- read_unlock_irq(&tree->lock);
- return 0;
-}
-EXPORT_SYMBOL(wait_extent_bit);
-
-static void set_state_bits(struct extent_map_tree *tree,
- struct extent_state *state,
- int bits)
-{
- if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
- u64 range = state->end - state->start + 1;
- tree->dirty_bytes += range;
- }
- state->state |= bits;
-}
-
-/*
- * set some bits on a range in the tree. This may require allocations
- * or sleeping, so the gfp mask is used to indicate what is allowed.
- *
- * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
- * range already has the desired bits set. The start of the existing
- * range is returned in failed_start in this case.
- *
- * [start, end] is inclusive
- * This takes the tree lock.
- */
-int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits,
- int exclusive, u64 *failed_start, gfp_t mask)
-{
- struct extent_state *state;
- struct extent_state *prealloc = NULL;
- struct rb_node *node;
- unsigned long flags;
- int err = 0;
- int set;
- u64 last_start;
- u64 last_end;
-again:
- if (!prealloc && (mask & __GFP_WAIT)) {
- prealloc = alloc_extent_state(mask);
- if (!prealloc)
- return -ENOMEM;
- }
-
- write_lock_irqsave(&tree->lock, flags);
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(&tree->state, start);
- if (!node) {
- err = insert_state(tree, prealloc, start, end, bits);
- prealloc = NULL;
- BUG_ON(err == -EEXIST);
- goto out;
- }
-
- state = rb_entry(node, struct extent_state, rb_node);
- last_start = state->start;
- last_end = state->end;
-
- /*
- * | ---- desired range ---- |
- * | state |
- *
- * Just lock what we found and keep going
- */
- if (state->start == start && state->end <= end) {
- set = state->state & bits;
- if (set && exclusive) {
- *failed_start = state->start;
- err = -EEXIST;
- goto out;
- }
- set_state_bits(tree, state, bits);
- start = state->end + 1;
- merge_state(tree, state);
- goto search_again;
- }
-
- /*
- * | ---- desired range ---- |
- * | state |
- * or
- * | ------------- state -------------- |
- *
- * We need to split the extent we found, and may flip bits on
- * second half.
- *
- * If the extent we found extends past our
- * range, we just split and search again. It'll get split
- * again the next time though.
- *
- * If the extent we found is inside our range, we set the
- * desired bit on it.
- */
- if (state->start < start) {
- set = state->state & bits;
- if (exclusive && set) {
- *failed_start = start;
- err = -EEXIST;
- goto out;
- }
- err = split_state(tree, state, prealloc, start);
- BUG_ON(err == -EEXIST);
- prealloc = NULL;
- if (err)
- goto out;
- if (state->end <= end) {
- set_state_bits(tree, state, bits);
- start = state->end + 1;
- merge_state(tree, state);
- } else {
- start = state->start;
- }
- goto search_again;
- }
- /*
- * | ---- desired range ---- |
- * | state | or | state |
- *
- * There's a hole, we need to insert something in it and
- * ignore the extent we found.
- */
- if (state->start > start) {
- u64 this_end;
- if (end < last_start)
- this_end = end;
- else
- this_end = last_start -1;
- err = insert_state(tree, prealloc, start, this_end,
- bits);
- prealloc = NULL;
- BUG_ON(err == -EEXIST);
- if (err)
- goto out;
- start = this_end + 1;
- goto search_again;
- }
- /*
- * | ---- desired range ---- |
- * | state |
- * We need to split the extent, and set the bit
- * on the first half
- */
- if (state->start <= end && state->end > end) {
- set = state->state & bits;
- if (exclusive && set) {
- *failed_start = start;
- err = -EEXIST;
- goto out;
- }
- err = split_state(tree, state, prealloc, end + 1);
- BUG_ON(err == -EEXIST);
-
- set_state_bits(tree, prealloc, bits);
- merge_state(tree, prealloc);
- prealloc = NULL;
- goto out;
- }
-
- goto search_again;
-
-out:
- write_unlock_irqrestore(&tree->lock, flags);
- if (prealloc)
- free_extent_state(prealloc);
-
- return err;
-
-search_again:
- if (start > end)
- goto out;
- write_unlock_irqrestore(&tree->lock, flags);
- if (mask & __GFP_WAIT)
- cond_resched();
- goto again;
-}
-EXPORT_SYMBOL(set_extent_bit);
-
-/* wrappers around set/clear extent bit */
-int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
- mask);
-}
-EXPORT_SYMBOL(set_extent_dirty);
-
-int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
- int bits, gfp_t mask)
-{
- return set_extent_bit(tree, start, end, bits, 0, NULL,
- mask);
-}
-EXPORT_SYMBOL(set_extent_bits);
-
-int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
- int bits, gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
-}
-EXPORT_SYMBOL(clear_extent_bits);
-
-int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL,
- mask);
-}
-EXPORT_SYMBOL(set_extent_delalloc);
-
-int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return clear_extent_bit(tree, start, end,
- EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
-}
-EXPORT_SYMBOL(clear_extent_dirty);
-
-int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
- mask);
-}
-EXPORT_SYMBOL(set_extent_new);
-
-int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
-}
-EXPORT_SYMBOL(clear_extent_new);
-
-int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
- mask);
-}
-EXPORT_SYMBOL(set_extent_uptodate);
-
-int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
-}
-EXPORT_SYMBOL(clear_extent_uptodate);
-
-int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
- 0, NULL, mask);
-}
-EXPORT_SYMBOL(set_extent_writeback);
-
-int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
-}
-EXPORT_SYMBOL(clear_extent_writeback);
-
-int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end)
-{
- return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
-}
-EXPORT_SYMBOL(wait_on_extent_writeback);
-
-/*
- * locks a range in ascending order, waiting for any locked regions
- * it hits on the way. [start,end] are inclusive, and this will sleep.
- */
-int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask)
-{
- int err;
- u64 failed_start;
- while (1) {
- err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
- &failed_start, mask);
- if (err == -EEXIST && (mask & __GFP_WAIT)) {
- wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
- start = failed_start;
- } else {
- break;
- }
- WARN_ON(start > end);
- }
- return err;
-}
-EXPORT_SYMBOL(lock_extent);
-
-int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
-}
-EXPORT_SYMBOL(unlock_extent);
-
-/*
- * helper function to set pages and extents in the tree dirty
- */
-int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end)
-{
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
- struct page *page;
-
- while (index <= end_index) {
- page = find_get_page(tree->mapping, index);
- BUG_ON(!page);
- __set_page_dirty_nobuffers(page);
- page_cache_release(page);
- index++;
- }
- set_extent_dirty(tree, start, end, GFP_NOFS);
- return 0;
-}
-EXPORT_SYMBOL(set_range_dirty);
-
-/*
- * helper function to set both pages and extents in the tree writeback
- */
-int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end)
-{
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
- struct page *page;
-
- while (index <= end_index) {
- page = find_get_page(tree->mapping, index);
- BUG_ON(!page);
- set_page_writeback(page);
- page_cache_release(page);
- index++;
- }
- set_extent_writeback(tree, start, end, GFP_NOFS);
- return 0;
-}
-EXPORT_SYMBOL(set_range_writeback);
-
-int find_first_extent_bit(struct extent_map_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, int bits)
-{
- struct rb_node *node;
- struct extent_state *state;
- int ret = 1;
-
- read_lock_irq(&tree->lock);
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(&tree->state, start);
- if (!node || IS_ERR(node)) {
- goto out;
- }
-
- while(1) {
- state = rb_entry(node, struct extent_state, rb_node);
- if (state->end >= start && (state->state & bits)) {
- *start_ret = state->start;
- *end_ret = state->end;
- ret = 0;
- break;
- }
- node = rb_next(node);
- if (!node)
- break;
- }
-out:
- read_unlock_irq(&tree->lock);
- return ret;
-}
-EXPORT_SYMBOL(find_first_extent_bit);
-
-u64 find_lock_delalloc_range(struct extent_map_tree *tree,
- u64 *start, u64 *end, u64 max_bytes)
-{
- struct rb_node *node;
- struct extent_state *state;
- u64 cur_start = *start;
- u64 found = 0;
- u64 total_bytes = 0;
-
- write_lock_irq(&tree->lock);
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
-search_again:
- node = tree_search(&tree->state, cur_start);
- if (!node || IS_ERR(node)) {
- *end = (u64)-1;
- goto out;
- }
-
- while(1) {
- state = rb_entry(node, struct extent_state, rb_node);
- if (found && state->start != cur_start) {
- goto out;
- }
- if (!(state->state & EXTENT_DELALLOC)) {
- if (!found)
- *end = state->end;
- goto out;
- }
- if (!found) {
- struct extent_state *prev_state;
- struct rb_node *prev_node = node;
- while(1) {
- prev_node = rb_prev(prev_node);
- if (!prev_node)
- break;
- prev_state = rb_entry(prev_node,
- struct extent_state,
- rb_node);
- if (!(prev_state->state & EXTENT_DELALLOC))
- break;
- state = prev_state;
- node = prev_node;
- }
- }
- if (state->state & EXTENT_LOCKED) {
- DEFINE_WAIT(wait);
- atomic_inc(&state->refs);
- prepare_to_wait(&state->wq, &wait,
- TASK_UNINTERRUPTIBLE);
- write_unlock_irq(&tree->lock);
- schedule();
- write_lock_irq(&tree->lock);
- finish_wait(&state->wq, &wait);
- free_extent_state(state);
- goto search_again;
- }
- state->state |= EXTENT_LOCKED;
- if (!found)
- *start = state->start;
- found++;
- *end = state->end;
- cur_start = state->end + 1;
- node = rb_next(node);
- if (!node)
- break;
- total_bytes += state->end - state->start + 1;
- if (total_bytes >= max_bytes)
- break;
- }
-out:
- write_unlock_irq(&tree->lock);
- return found;
-}
-
-u64 count_range_bits(struct extent_map_tree *tree,
- u64 *start, u64 search_end, u64 max_bytes,
- unsigned long bits)
-{
- struct rb_node *node;
- struct extent_state *state;
- u64 cur_start = *start;
- u64 total_bytes = 0;
- int found = 0;
-
- if (search_end <= cur_start) {
- printk("search_end %Lu start %Lu\n", search_end, cur_start);
- WARN_ON(1);
- return 0;
- }
-
- write_lock_irq(&tree->lock);
- if (cur_start == 0 && bits == EXTENT_DIRTY) {
- total_bytes = tree->dirty_bytes;
- goto out;
- }
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(&tree->state, cur_start);
- if (!node || IS_ERR(node)) {
- goto out;
- }
-
- while(1) {
- state = rb_entry(node, struct extent_state, rb_node);
- if (state->start > search_end)
- break;
- if (state->end >= cur_start && (state->state & bits)) {
- total_bytes += min(search_end, state->end) + 1 -
- max(cur_start, state->start);
- if (total_bytes >= max_bytes)
- break;
- if (!found) {
- *start = state->start;
- found = 1;
- }
- }
- node = rb_next(node);
- if (!node)
- break;
- }
-out:
- write_unlock_irq(&tree->lock);
- return total_bytes;
-}
-/*
- * helper function to lock both pages and extents in the tree.
- * pages must be locked first.
- */
-int lock_range(struct extent_map_tree *tree, u64 start, u64 end)
-{
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
- struct page *page;
- int err;
-
- while (index <= end_index) {
- page = grab_cache_page(tree->mapping, index);
- if (!page) {
- err = -ENOMEM;
- goto failed;
- }
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- goto failed;
- }
- index++;
- }
- lock_extent(tree, start, end, GFP_NOFS);
- return 0;
-
-failed:
- /*
- * we failed above in getting the page at 'index', so we undo here
- * up to but not including the page at 'index'
- */
- end_index = index;
- index = start >> PAGE_CACHE_SHIFT;
- while (index < end_index) {
- page = find_get_page(tree->mapping, index);
- unlock_page(page);
- page_cache_release(page);
- index++;
- }
- return err;
-}
-EXPORT_SYMBOL(lock_range);
-
-/*
- * helper function to unlock both pages and extents in the tree.
- */
-int unlock_range(struct extent_map_tree *tree, u64 start, u64 end)
-{
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
- struct page *page;
-
- while (index <= end_index) {
- page = find_get_page(tree->mapping, index);
- unlock_page(page);
- page_cache_release(page);
- index++;
- }
- unlock_extent(tree, start, end, GFP_NOFS);
- return 0;
-}
-EXPORT_SYMBOL(unlock_range);
-
-int set_state_private(struct extent_map_tree *tree, u64 start, u64 private)
-{
- struct rb_node *node;
- struct extent_state *state;
- int ret = 0;
-
- write_lock_irq(&tree->lock);
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(&tree->state, start);
- if (!node || IS_ERR(node)) {
- ret = -ENOENT;
- goto out;
- }
- state = rb_entry(node, struct extent_state, rb_node);
- if (state->start != start) {
- ret = -ENOENT;
- goto out;
- }
- state->private = private;
-out:
- write_unlock_irq(&tree->lock);
- return ret;
-}
-
-int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private)
-{
- struct rb_node *node;
- struct extent_state *state;
- int ret = 0;
-
- read_lock_irq(&tree->lock);
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(&tree->state, start);
- if (!node || IS_ERR(node)) {
- ret = -ENOENT;
- goto out;
- }
- state = rb_entry(node, struct extent_state, rb_node);
- if (state->start != start) {
- ret = -ENOENT;
- goto out;
- }
- *private = state->private;
-out:
- read_unlock_irq(&tree->lock);
- return ret;
-}
-
-/*
- * searches a range in the state tree for a given mask.
- * If 'filled' == 1, this returns 1 only if ever extent in the tree
- * has the bits set. Otherwise, 1 is returned if any bit in the
- * range is found set.
- */
-int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
- int bits, int filled)
-{
- struct extent_state *state = NULL;
- struct rb_node *node;
- int bitset = 0;
-
- read_lock_irq(&tree->lock);
- node = tree_search(&tree->state, start);
- while (node && start <= end) {
- state = rb_entry(node, struct extent_state, rb_node);
-
- if (filled && state->start > start) {
- bitset = 0;
- break;
- }
-
- if (state->start > end)
- break;
-
- if (state->state & bits) {
- bitset = 1;
- if (!filled)
- break;
- } else if (filled) {
- bitset = 0;
- break;
- }
- start = state->end + 1;
- if (start > end)
- break;
- node = rb_next(node);
- if (!node) {
- if (filled)
- bitset = 0;
- break;
- }
- }
- read_unlock_irq(&tree->lock);
- return bitset;
-}
-EXPORT_SYMBOL(test_range_bit);
-
-/*
- * helper function to set a given page up to date if all the
- * extents in the tree for that page are up to date
- */
-static int check_page_uptodate(struct extent_map_tree *tree,
- struct page *page)
-{
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 end = start + PAGE_CACHE_SIZE - 1;
- if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
- SetPageUptodate(page);
- return 0;
-}
-
-/*
- * helper function to unlock a page if all the extents in the tree
- * for that page are unlocked
- */
-static int check_page_locked(struct extent_map_tree *tree,
- struct page *page)
-{
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 end = start + PAGE_CACHE_SIZE - 1;
- if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
- unlock_page(page);
- return 0;
-}
-
-/*
- * helper function to end page writeback if all the extents
- * in the tree for that page are done with writeback
- */
-static int check_page_writeback(struct extent_map_tree *tree,
- struct page *page)
-{
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 end = start + PAGE_CACHE_SIZE - 1;
- if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
- end_page_writeback(page);
- return 0;
-}
-
-/* lots and lots of room for performance fixes in the end_bio funcs */
-
-/*
- * after a writepage IO is done, we need to:
- * clear the uptodate bits on error
- * clear the writeback bits in the extent tree for this IO
- * end_page_writeback if the page has no more pending IO
- *
- * Scheduling is not allowed, so the extent state tree is expected
- * to have one and only one object corresponding to this IO.
- */
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
-static void end_bio_extent_writepage(struct bio *bio, int err)
-#else
-static int end_bio_extent_writepage(struct bio *bio,
- unsigned int bytes_done, int err)
-#endif
-{
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct extent_map_tree *tree = bio->bi_private;
- u64 start;
- u64 end;
- int whole_page;
-
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- if (bio->bi_size)
- return 1;
-#endif
-
- do {
- struct page *page = bvec->bv_page;
- start = ((u64)page->index << PAGE_CACHE_SHIFT) +
- bvec->bv_offset;
- end = start + bvec->bv_len - 1;
-
- if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
- whole_page = 1;
- else
- whole_page = 0;
-
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
-
- if (!uptodate) {
- clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
- ClearPageUptodate(page);
- SetPageError(page);
- }
- clear_extent_writeback(tree, start, end, GFP_ATOMIC);
-
- if (whole_page)
- end_page_writeback(page);
- else
- check_page_writeback(tree, page);
- if (tree->ops && tree->ops->writepage_end_io_hook)
- tree->ops->writepage_end_io_hook(page, start, end);
- } while (bvec >= bio->bi_io_vec);
-
- bio_put(bio);
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- return 0;
-#endif
-}
-
-/*
- * after a readpage IO is done, we need to:
- * clear the uptodate bits on error
- * set the uptodate bits if things worked
- * set the page up to date if all extents in the tree are uptodate
- * clear the lock bit in the extent tree
- * unlock the page if there are no other extents locked for it
- *
- * Scheduling is not allowed, so the extent state tree is expected
- * to have one and only one object corresponding to this IO.
- */
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
-static void end_bio_extent_readpage(struct bio *bio, int err)
-#else
-static int end_bio_extent_readpage(struct bio *bio,
- unsigned int bytes_done, int err)
-#endif
-{
- int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct extent_map_tree *tree = bio->bi_private;
- u64 start;
- u64 end;
- int whole_page;
- int ret;
-
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- if (bio->bi_size)
- return 1;
-#endif
-
- do {
- struct page *page = bvec->bv_page;
- start = ((u64)page->index << PAGE_CACHE_SHIFT) +
- bvec->bv_offset;
- end = start + bvec->bv_len - 1;
-
- if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
- whole_page = 1;
- else
- whole_page = 0;
-
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
-
- if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
- ret = tree->ops->readpage_end_io_hook(page, start, end);
- if (ret)
- uptodate = 0;
- }
- if (uptodate) {
- set_extent_uptodate(tree, start, end, GFP_ATOMIC);
- if (whole_page)
- SetPageUptodate(page);
- else
- check_page_uptodate(tree, page);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
-
- unlock_extent(tree, start, end, GFP_ATOMIC);
-
- if (whole_page)
- unlock_page(page);
- else
- check_page_locked(tree, page);
- } while (bvec >= bio->bi_io_vec);
-
- bio_put(bio);
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- return 0;
-#endif
-}
-
-/*
- * IO done from prepare_write is pretty simple, we just unlock
- * the structs in the extent tree when done, and set the uptodate bits
- * as appropriate.
- */
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
-static void end_bio_extent_preparewrite(struct bio *bio, int err)
-#else
-static int end_bio_extent_preparewrite(struct bio *bio,
- unsigned int bytes_done, int err)
-#endif
-{
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct extent_map_tree *tree = bio->bi_private;
- u64 start;
- u64 end;
-
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- if (bio->bi_size)
- return 1;
-#endif
-
- do {
- struct page *page = bvec->bv_page;
- start = ((u64)page->index << PAGE_CACHE_SHIFT) +
- bvec->bv_offset;
- end = start + bvec->bv_len - 1;
-
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
-
- if (uptodate) {
- set_extent_uptodate(tree, start, end, GFP_ATOMIC);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
-
- unlock_extent(tree, start, end, GFP_ATOMIC);
-
- } while (bvec >= bio->bi_io_vec);
-
- bio_put(bio);
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
- return 0;
-#endif
-}
-
-static struct bio *
-extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
- gfp_t gfp_flags)
-{
- struct bio *bio;
-
- bio = bio_alloc(gfp_flags, nr_vecs);
-
- if (bio == NULL && (current->flags & PF_MEMALLOC)) {
- while (!bio && (nr_vecs /= 2))
- bio = bio_alloc(gfp_flags, nr_vecs);
- }
-
- if (bio) {
- bio->bi_bdev = bdev;
- bio->bi_sector = first_sector;
- }
- return bio;
-}
-
-static int submit_one_bio(int rw, struct bio *bio)
-{
- u64 maxsector;
- int ret = 0;
-
- bio_get(bio);
-
- maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
- if (maxsector < bio->bi_sector) {
- printk("sector too large max %Lu got %llu\n", maxsector,
- (unsigned long long)bio->bi_sector);
- WARN_ON(1);
- }
-
- submit_bio(rw, bio);
- if (bio_flagged(bio, BIO_EOPNOTSUPP))
- ret = -EOPNOTSUPP;
- bio_put(bio);
- return ret;
-}
-
-static int submit_extent_page(int rw, struct extent_map_tree *tree,
- struct page *page, sector_t sector,
- size_t size, unsigned long offset,
- struct block_device *bdev,
- struct bio **bio_ret,
- unsigned long max_pages,
- bio_end_io_t end_io_func)
-{
- int ret = 0;
- struct bio *bio;
- int nr;
-
- if (bio_ret && *bio_ret) {
- bio = *bio_ret;
- if (bio->bi_sector + (bio->bi_size >> 9) != sector ||
- bio_add_page(bio, page, size, offset) < size) {
- ret = submit_one_bio(rw, bio);
- bio = NULL;
- } else {
- return 0;
- }
- }
- nr = min_t(int, max_pages, bio_get_nr_vecs(bdev));
- bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
- if (!bio) {
- printk("failed to allocate bio nr %d\n", nr);
- }
- bio_add_page(bio, page, size, offset);
- bio->bi_end_io = end_io_func;
- bio->bi_private = tree;
- if (bio_ret) {
- *bio_ret = bio;
- } else {
- ret = submit_one_bio(rw, bio);
- }
-
- return ret;
-}
-
-void set_page_extent_mapped(struct page *page)
-{
- if (!PagePrivate(page)) {
- SetPagePrivate(page);
- WARN_ON(!page->mapping->a_ops->invalidatepage);
- set_page_private(page, EXTENT_PAGE_PRIVATE);
- page_cache_get(page);
- }
-}
-
-void set_page_extent_head(struct page *page, unsigned long len)
-{
- set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
-}
-
-/*
- * basic readpage implementation. Locked extent state structs are inserted
- * into the tree that are removed when the IO is done (by the end_io
- * handlers)
- */
-static int __extent_read_full_page(struct extent_map_tree *tree,
- struct page *page,
- get_extent_t *get_extent,
- struct bio **bio)
-{
- struct inode *inode = page->mapping->host;
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 page_end = start + PAGE_CACHE_SIZE - 1;
- u64 end;
- u64 cur = start;
- u64 extent_offset;
- u64 last_byte = i_size_read(inode);
- u64 block_start;
- u64 cur_end;
- sector_t sector;
- struct extent_map *em;
- struct block_device *bdev;
- int ret;
- int nr = 0;
- size_t page_offset = 0;
- size_t iosize;
- size_t blocksize = inode->i_sb->s_blocksize;
-
- set_page_extent_mapped(page);
-
- end = page_end;
- lock_extent(tree, start, end, GFP_NOFS);
-
- while (cur <= end) {
- if (cur >= last_byte) {
- char *userpage;
- iosize = PAGE_CACHE_SIZE - page_offset;
- userpage = kmap_atomic(page, KM_USER0);
- memset(userpage + page_offset, 0, iosize);
- flush_dcache_page(page);
- kunmap_atomic(userpage, KM_USER0);
- set_extent_uptodate(tree, cur, cur + iosize - 1,
- GFP_NOFS);
- unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
- break;
- }
- em = get_extent(inode, page, page_offset, cur, end, 0);
- if (IS_ERR(em) || !em) {
- SetPageError(page);
- unlock_extent(tree, cur, end, GFP_NOFS);
- break;
- }
-
- extent_offset = cur - em->start;
- BUG_ON(em->end < cur);
- BUG_ON(end < cur);
-
- iosize = min(em->end - cur, end - cur) + 1;
- cur_end = min(em->end, end);
- iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
- sector = (em->block_start + extent_offset) >> 9;
- bdev = em->bdev;
- block_start = em->block_start;
- free_extent_map(em);
- em = NULL;
-
- /* we've found a hole, just zero and go on */
- if (block_start == EXTENT_MAP_HOLE) {
- char *userpage;
- userpage = kmap_atomic(page, KM_USER0);
- memset(userpage + page_offset, 0, iosize);
- flush_dcache_page(page);
- kunmap_atomic(userpage, KM_USER0);
-
- set_extent_uptodate(tree, cur, cur + iosize - 1,
- GFP_NOFS);
- unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
- cur = cur + iosize;
- page_offset += iosize;
- continue;
- }
- /* the get_extent function already copied into the page */
- if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
- unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
- cur = cur + iosize;
- page_offset += iosize;
- continue;
- }
-
- ret = 0;
- if (tree->ops && tree->ops->readpage_io_hook) {
- ret = tree->ops->readpage_io_hook(page, cur,
- cur + iosize - 1);
- }
- if (!ret) {
- unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
- nr -= page->index;
- ret = submit_extent_page(READ, tree, page,
- sector, iosize, page_offset,
- bdev, bio, nr,
- end_bio_extent_readpage);
- }
- if (ret)
- SetPageError(page);
- cur = cur + iosize;
- page_offset += iosize;
- nr++;
- }
- if (!nr) {
- if (!PageError(page))
- SetPageUptodate(page);
- unlock_page(page);
- }
- return 0;
-}
-
-int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
- get_extent_t *get_extent)
-{
- struct bio *bio = NULL;
- int ret;
-
- ret = __extent_read_full_page(tree, page, get_extent, &bio);
- if (bio)
- submit_one_bio(READ, bio);
- return ret;
-}
-EXPORT_SYMBOL(extent_read_full_page);
-
-/*
- * the writepage semantics are similar to regular writepage. extent
- * records are inserted to lock ranges in the tree, and as dirty areas
- * are found, they are marked writeback. Then the lock bits are removed
- * and the end_io handler clears the writeback ranges
- */
-static int __extent_writepage(struct page *page, struct writeback_control *wbc,
- void *data)
-{
- struct inode *inode = page->mapping->host;
- struct extent_page_data *epd = data;
- struct extent_map_tree *tree = epd->tree;
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 delalloc_start;
- u64 page_end = start + PAGE_CACHE_SIZE - 1;
- u64 end;
- u64 cur = start;
- u64 extent_offset;
- u64 last_byte = i_size_read(inode);
- u64 block_start;
- u64 iosize;
- sector_t sector;
- struct extent_map *em;
- struct block_device *bdev;
- int ret;
- int nr = 0;
- size_t page_offset = 0;
- size_t blocksize;
- loff_t i_size = i_size_read(inode);
- unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
- u64 nr_delalloc;
- u64 delalloc_end;
-
- WARN_ON(!PageLocked(page));
- if (page->index > end_index) {
- clear_extent_dirty(tree, start, page_end, GFP_NOFS);
- unlock_page(page);
- return 0;
- }
-
- if (page->index == end_index) {
- char *userpage;
-
- size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
-
- userpage = kmap_atomic(page, KM_USER0);
- memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset);
- flush_dcache_page(page);
- kunmap_atomic(userpage, KM_USER0);
- }
-
- set_page_extent_mapped(page);
-
- delalloc_start = start;
- delalloc_end = 0;
- while(delalloc_end < page_end) {
- nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start,
- &delalloc_end,
- 128 * 1024 * 1024);
- if (nr_delalloc == 0) {
- delalloc_start = delalloc_end + 1;
- continue;
- }
- tree->ops->fill_delalloc(inode, delalloc_start,
- delalloc_end);
- clear_extent_bit(tree, delalloc_start,
- delalloc_end,
- EXTENT_LOCKED | EXTENT_DELALLOC,
- 1, 0, GFP_NOFS);
- delalloc_start = delalloc_end + 1;
- }
- lock_extent(tree, start, page_end, GFP_NOFS);
-
- end = page_end;
- if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
- printk("found delalloc bits after lock_extent\n");
- }
-
- if (last_byte <= start) {
- clear_extent_dirty(tree, start, page_end, GFP_NOFS);
- goto done;
- }
-
- set_extent_uptodate(tree, start, page_end, GFP_NOFS);
- blocksize = inode->i_sb->s_blocksize;
-
- while (cur <= end) {
- if (cur >= last_byte) {
- clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
- break;
- }
- em = epd->get_extent(inode, page, page_offset, cur, end, 1);
- if (IS_ERR(em) || !em) {
- SetPageError(page);
- break;
- }
-
- extent_offset = cur - em->start;
- BUG_ON(em->end < cur);
- BUG_ON(end < cur);
- iosize = min(em->end - cur, end - cur) + 1;
- iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
- sector = (em->block_start + extent_offset) >> 9;
- bdev = em->bdev;
- block_start = em->block_start;
- free_extent_map(em);
- em = NULL;
-
- if (block_start == EXTENT_MAP_HOLE ||
- block_start == EXTENT_MAP_INLINE) {
- clear_extent_dirty(tree, cur,
- cur + iosize - 1, GFP_NOFS);
- cur = cur + iosize;
- page_offset += iosize;
- continue;
- }
-
- /* leave this out until we have a page_mkwrite call */
- if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
- EXTENT_DIRTY, 0)) {
- cur = cur + iosize;
- page_offset += iosize;
- continue;
- }
- clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
- if (tree->ops && tree->ops->writepage_io_hook) {
- ret = tree->ops->writepage_io_hook(page, cur,
- cur + iosize - 1);
- } else {
- ret = 0;
- }
- if (ret)
- SetPageError(page);
- else {
- unsigned long max_nr = end_index + 1;
- set_range_writeback(tree, cur, cur + iosize - 1);
- if (!PageWriteback(page)) {
- printk("warning page %lu not writeback, "
- "cur %llu end %llu\n", page->index,
- (unsigned long long)cur,
- (unsigned long long)end);
- }
-
- ret = submit_extent_page(WRITE, tree, page, sector,
- iosize, page_offset, bdev,
- &epd->bio, max_nr,
- end_bio_extent_writepage);
- if (ret)
- SetPageError(page);
- }
- cur = cur + iosize;
- page_offset += iosize;
- nr++;
- }
-done:
- if (nr == 0) {
- /* make sure the mapping tag for page dirty gets cleared */
- set_page_writeback(page);
- end_page_writeback(page);
- }
- unlock_extent(tree, start, page_end, GFP_NOFS);
- unlock_page(page);
- return 0;
-}
-
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-
-/* Taken directly from 2.6.23 for 2.6.18 back port */
-typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
- void *data);
-
-/**
- * write_cache_pages - walk the list of dirty pages of the given address space
- * and write all of them.
- * @mapping: address space structure to write
- * @wbc: subtract the number of written pages from *@wbc->nr_to_write
- * @writepage: function called for each page
- * @data: data passed to writepage function
- *
- * If a page is already under I/O, write_cache_pages() skips it, even
- * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
- * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
- * and msync() need to guarantee that all the data which was dirty at the time
- * the call was made get new I/O started against them. If wbc->sync_mode is
- * WB_SYNC_ALL then we were called for data integrity and we must wait for
- * existing IO to complete.
- */
-static int write_cache_pages(struct address_space *mapping,
- struct writeback_control *wbc, writepage_t writepage,
- void *data)
-{
- struct backing_dev_info *bdi = mapping->backing_dev_info;
- int ret = 0;
- int done = 0;
- struct pagevec pvec;
- int nr_pages;
- pgoff_t index;
- pgoff_t end; /* Inclusive */
- int scanned = 0;
- int range_whole = 0;
-
- if (wbc->nonblocking && bdi_write_congested(bdi)) {
- wbc->encountered_congestion = 1;
- return 0;
- }
-
- pagevec_init(&pvec, 0);
- if (wbc->range_cyclic) {
- index = mapping->writeback_index; /* Start from prev offset */
- end = -1;
- } else {
- index = wbc->range_start >> PAGE_CACHE_SHIFT;
- end = wbc->range_end >> PAGE_CACHE_SHIFT;
- if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
- range_whole = 1;
- scanned = 1;
- }
-retry:
- while (!done && (index <= end) &&
- (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
- unsigned i;
-
- scanned = 1;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- /*
- * At this point we hold neither mapping->tree_lock nor
- * lock on the page itself: the page may be truncated or
- * invalidated (changing page->mapping to NULL), or even
- * swizzled back from swapper_space to tmpfs file
- * mapping
- */
- lock_page(page);
-
- if (unlikely(page->mapping != mapping)) {
- unlock_page(page);
- continue;
- }
-
- if (!wbc->range_cyclic && page->index > end) {
- done = 1;
- unlock_page(page);
- continue;
- }
-
- if (wbc->sync_mode != WB_SYNC_NONE)
- wait_on_page_writeback(page);
-
- if (PageWriteback(page) ||
- !clear_page_dirty_for_io(page)) {
- unlock_page(page);
- continue;
- }
-
- ret = (*writepage)(page, wbc, data);
-
- if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
- unlock_page(page);
- ret = 0;
- }
- if (ret || (--(wbc->nr_to_write) <= 0))
- done = 1;
- if (wbc->nonblocking && bdi_write_congested(bdi)) {
- wbc->encountered_congestion = 1;
- done = 1;
- }
- }
- pagevec_release(&pvec);
- cond_resched();
- }
- if (!scanned && !done) {
- /*
- * We hit the last page and there is more work to be done: wrap
- * back to the start of the file
- */
- scanned = 1;
- index = 0;
- goto retry;
- }
- if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
- mapping->writeback_index = index;
- return ret;
-}
-#endif
-
-int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
- get_extent_t *get_extent,
- struct writeback_control *wbc)
-{
- int ret;
- struct address_space *mapping = page->mapping;
- struct extent_page_data epd = {
- .bio = NULL,
- .tree = tree,
- .get_extent = get_extent,
- };
- struct writeback_control wbc_writepages = {
- .bdi = wbc->bdi,
- .sync_mode = WB_SYNC_NONE,
- .older_than_this = NULL,
- .nr_to_write = 64,
- .range_start = page_offset(page) + PAGE_CACHE_SIZE,
- .range_end = (loff_t)-1,
- };
-
-
- ret = __extent_writepage(page, wbc, &epd);
-
- write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
- if (epd.bio) {
- submit_one_bio(WRITE, epd.bio);
- }
- return ret;
-}
-EXPORT_SYMBOL(extent_write_full_page);
-
-
-int extent_writepages(struct extent_map_tree *tree,
- struct address_space *mapping,
- get_extent_t *get_extent,
- struct writeback_control *wbc)
-{
int ret = 0;
- struct extent_page_data epd = {
- .bio = NULL,
- .tree = tree,
- .get_extent = get_extent,
- };
-
- ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
- if (epd.bio) {
- submit_one_bio(WRITE, epd.bio);
- }
- return ret;
-}
-EXPORT_SYMBOL(extent_writepages);
-
-int extent_readpages(struct extent_map_tree *tree,
- struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages,
- get_extent_t get_extent)
-{
- struct bio *bio = NULL;
- unsigned page_idx;
- struct pagevec pvec;
-
- pagevec_init(&pvec, 0);
- for (page_idx = 0; page_idx < nr_pages; page_idx++) {
- struct page *page = list_entry(pages->prev, struct page, lru);
-
- prefetchw(&page->flags);
- list_del(&page->lru);
- /*
- * what we want to do here is call add_to_page_cache_lru,
- * but that isn't exported, so we reproduce it here
- */
- if (!add_to_page_cache(page, mapping,
- page->index, GFP_KERNEL)) {
-
- /* open coding of lru_cache_add, also not exported */
- page_cache_get(page);
- if (!pagevec_add(&pvec, page))
- __pagevec_lru_add(&pvec);
- __extent_read_full_page(tree, page, get_extent, &bio);
- }
- page_cache_release(page);
- }
- if (pagevec_count(&pvec))
- __pagevec_lru_add(&pvec);
- BUG_ON(!list_empty(pages));
- if (bio)
- submit_one_bio(READ, bio);
- return 0;
-}
-EXPORT_SYMBOL(extent_readpages);
-
-/*
- * basic invalidatepage code, this waits on any locked or writeback
- * ranges corresponding to the page, and then deletes any extent state
- * records from the tree
- */
-int extent_invalidatepage(struct extent_map_tree *tree,
- struct page *page, unsigned long offset)
-{
- u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
- u64 end = start + PAGE_CACHE_SIZE - 1;
- size_t blocksize = page->mapping->host->i_sb->s_blocksize;
-
- start += (offset + blocksize -1) & ~(blocksize - 1);
- if (start > end)
- return 0;
-
- lock_extent(tree, start, end, GFP_NOFS);
- wait_on_extent_writeback(tree, start, end);
- clear_extent_bit(tree, start, end,
- EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
- 1, 1, GFP_NOFS);
- return 0;
-}
-EXPORT_SYMBOL(extent_invalidatepage);
-
-/*
- * simple commit_write call, set_range_dirty is used to mark both
- * the pages and the extent records as dirty
- */
-int extent_commit_write(struct extent_map_tree *tree,
- struct inode *inode, struct page *page,
- unsigned from, unsigned to)
-{
- loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-
- set_page_extent_mapped(page);
- set_page_dirty(page);
-
- if (pos > inode->i_size) {
- i_size_write(inode, pos);
- mark_inode_dirty(inode);
- }
- return 0;
-}
-EXPORT_SYMBOL(extent_commit_write);
-
-int extent_prepare_write(struct extent_map_tree *tree,
- struct inode *inode, struct page *page,
- unsigned from, unsigned to, get_extent_t *get_extent)
-{
- u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
- u64 block_start;
- u64 orig_block_start;
- u64 block_end;
- u64 cur_end;
- struct extent_map *em;
- unsigned blocksize = 1 << inode->i_blkbits;
- size_t page_offset = 0;
- size_t block_off_start;
- size_t block_off_end;
- int err = 0;
- int iocount = 0;
- int ret = 0;
- int isnew;
-
- set_page_extent_mapped(page);
-
- block_start = (page_start + from) & ~((u64)blocksize - 1);
- block_end = (page_start + to - 1) | (blocksize - 1);
- orig_block_start = block_start;
-
- lock_extent(tree, page_start, page_end, GFP_NOFS);
- while(block_start <= block_end) {
- em = get_extent(inode, page, page_offset, block_start,
- block_end, 1);
- if (IS_ERR(em) || !em) {
- goto err;
- }
- cur_end = min(block_end, em->end);
- block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
- block_off_end = block_off_start + blocksize;
- isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
-
- if (!PageUptodate(page) && isnew &&
- (block_off_end > to || block_off_start < from)) {
- void *kaddr;
-
- kaddr = kmap_atomic(page, KM_USER0);
- if (block_off_end > to)
- memset(kaddr + to, 0, block_off_end - to);
- if (block_off_start < from)
- memset(kaddr + block_off_start, 0,
- from - block_off_start);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
- }
- if ((em->block_start != EXTENT_MAP_HOLE &&
- em->block_start != EXTENT_MAP_INLINE) &&
- !isnew && !PageUptodate(page) &&
- (block_off_end > to || block_off_start < from) &&
- !test_range_bit(tree, block_start, cur_end,
- EXTENT_UPTODATE, 1)) {
- u64 sector;
- u64 extent_offset = block_start - em->start;
- size_t iosize;
- sector = (em->block_start + extent_offset) >> 9;
- iosize = (cur_end - block_start + blocksize) &
- ~((u64)blocksize - 1);
- /*
- * we've already got the extent locked, but we
- * need to split the state such that our end_bio
- * handler can clear the lock.
- */
- set_extent_bit(tree, block_start,
- block_start + iosize - 1,
- EXTENT_LOCKED, 0, NULL, GFP_NOFS);
- ret = submit_extent_page(READ, tree, page,
- sector, iosize, page_offset, em->bdev,
- NULL, 1,
- end_bio_extent_preparewrite);
- iocount++;
- block_start = block_start + iosize;
- } else {
- set_extent_uptodate(tree, block_start, cur_end,
- GFP_NOFS);
- unlock_extent(tree, block_start, cur_end, GFP_NOFS);
- block_start = cur_end + 1;
- }
- page_offset = block_start & (PAGE_CACHE_SIZE - 1);
- free_extent_map(em);
- }
- if (iocount) {
- wait_extent_bit(tree, orig_block_start,
- block_end, EXTENT_LOCKED);
- }
- check_page_uptodate(tree, page);
-err:
- /* FIXME, zero out newly allocated blocks on error */
- return err;
-}
-EXPORT_SYMBOL(extent_prepare_write);
-
-/*
- * a helper for releasepage. As long as there are no locked extents
- * in the range corresponding to the page, both state records and extent
- * map records are removed
- */
-int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page)
-{
- struct extent_map *em;
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 end = start + PAGE_CACHE_SIZE - 1;
- u64 orig_start = start;
- int ret = 1;
-
- while (start <= end) {
- em = lookup_extent_mapping(tree, start, end);
- if (!em || IS_ERR(em))
- break;
- if (!test_range_bit(tree, em->start, em->end,
- EXTENT_LOCKED, 0)) {
- remove_extent_mapping(tree, em);
- /* once for the rb tree */
- free_extent_map(em);
- }
- start = em->end + 1;
- /* once for us */
- free_extent_map(em);
- }
- if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0))
- ret = 0;
- else
- clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
- 1, 1, GFP_NOFS);
- return ret;
-}
-EXPORT_SYMBOL(try_release_extent_mapping);
-
-sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
- get_extent_t *get_extent)
-{
- struct inode *inode = mapping->host;
- u64 start = iblock << inode->i_blkbits;
- u64 end = start + (1 << inode->i_blkbits) - 1;
- sector_t sector = 0;
- struct extent_map *em;
-
- em = get_extent(inode, NULL, 0, start, end, 0);
- if (!em || IS_ERR(em))
- return 0;
-
- if (em->block_start == EXTENT_MAP_INLINE ||
- em->block_start == EXTENT_MAP_HOLE)
- goto out;
-
- sector = (em->block_start + start - em->start) >> inode->i_blkbits;
-out:
- free_extent_map(em);
- return sector;
-}
-
-static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb)
-{
- if (list_empty(&eb->lru)) {
- extent_buffer_get(eb);
- list_add(&eb->lru, &tree->buffer_lru);
- tree->lru_size++;
- if (tree->lru_size >= BUFFER_LRU_MAX) {
- struct extent_buffer *rm;
- rm = list_entry(tree->buffer_lru.prev,
- struct extent_buffer, lru);
- tree->lru_size--;
- list_del_init(&rm->lru);
- free_extent_buffer(rm);
- }
- } else
- list_move(&eb->lru, &tree->buffer_lru);
- return 0;
-}
-static struct extent_buffer *find_lru(struct extent_map_tree *tree,
- u64 start, unsigned long len)
-{
- struct list_head *lru = &tree->buffer_lru;
- struct list_head *cur = lru->next;
- struct extent_buffer *eb;
-
- if (list_empty(lru))
- return NULL;
-
- do {
- eb = list_entry(cur, struct extent_buffer, lru);
- if (eb->start == start && eb->len == len) {
- extent_buffer_get(eb);
- return eb;
- }
- cur = cur->next;
- } while (cur != lru);
- return NULL;
-}
-
-static inline unsigned long num_extent_pages(u64 start, u64 len)
-{
- return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
- (start >> PAGE_CACHE_SHIFT);
-}
-
-static inline struct page *extent_buffer_page(struct extent_buffer *eb,
- unsigned long i)
-{
- struct page *p;
- struct address_space *mapping;
-
- if (i == 0)
- return eb->first_page;
- i += eb->start >> PAGE_CACHE_SHIFT;
- mapping = eb->first_page->mapping;
- read_lock_irq(&mapping->tree_lock);
- p = radix_tree_lookup(&mapping->page_tree, i);
- read_unlock_irq(&mapping->tree_lock);
- return p;
-}
-
-static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree,
- u64 start,
- unsigned long len,
- gfp_t mask)
-{
- struct extent_buffer *eb = NULL;
- spin_lock(&tree->lru_lock);
- eb = find_lru(tree, start, len);
- spin_unlock(&tree->lru_lock);
- if (eb) {
- return eb;
- }
-
- eb = kmem_cache_zalloc(extent_buffer_cache, mask);
- INIT_LIST_HEAD(&eb->lru);
- eb->start = start;
- eb->len = len;
- atomic_set(&eb->refs, 1);
-
- return eb;
-}
-
-static void __free_extent_buffer(struct extent_buffer *eb)
-{
- kmem_cache_free(extent_buffer_cache, eb);
-}
-
-struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
- u64 start, unsigned long len,
- struct page *page0,
- gfp_t mask)
-{
- unsigned long num_pages = num_extent_pages(start, len);
- unsigned long i;
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- struct extent_buffer *eb;
- struct page *p;
- struct address_space *mapping = tree->mapping;
- int uptodate = 1;
-
- eb = __alloc_extent_buffer(tree, start, len, mask);
- if (!eb || IS_ERR(eb))
- return NULL;
-
- if (eb->flags & EXTENT_BUFFER_FILLED)
- goto lru_add;
-
- if (page0) {
- eb->first_page = page0;
- i = 1;
- index++;
- page_cache_get(page0);
- mark_page_accessed(page0);
- set_page_extent_mapped(page0);
- WARN_ON(!PageUptodate(page0));
- set_page_extent_head(page0, len);
- } else {
- i = 0;
- }
- for (; i < num_pages; i++, index++) {
- p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
- if (!p) {
- WARN_ON(1);
- goto fail;
- }
- set_page_extent_mapped(p);
- mark_page_accessed(p);
- if (i == 0) {
- eb->first_page = p;
- set_page_extent_head(p, len);
- } else {
- set_page_private(p, EXTENT_PAGE_PRIVATE);
- }
- if (!PageUptodate(p))
- uptodate = 0;
- unlock_page(p);
- }
- if (uptodate)
- eb->flags |= EXTENT_UPTODATE;
- eb->flags |= EXTENT_BUFFER_FILLED;
-
-lru_add:
- spin_lock(&tree->lru_lock);
- add_lru(tree, eb);
- spin_unlock(&tree->lru_lock);
- return eb;
-
-fail:
- spin_lock(&tree->lru_lock);
- list_del_init(&eb->lru);
- spin_unlock(&tree->lru_lock);
- if (!atomic_dec_and_test(&eb->refs))
- return NULL;
- for (index = 1; index < i; index++) {
- page_cache_release(extent_buffer_page(eb, index));
- }
- if (i > 0)
- page_cache_release(extent_buffer_page(eb, 0));
- __free_extent_buffer(eb);
- return NULL;
-}
-EXPORT_SYMBOL(alloc_extent_buffer);
-
-struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
- u64 start, unsigned long len,
- gfp_t mask)
-{
- unsigned long num_pages = num_extent_pages(start, len);
- unsigned long i;
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- struct extent_buffer *eb;
- struct page *p;
- struct address_space *mapping = tree->mapping;
- int uptodate = 1;
-
- eb = __alloc_extent_buffer(tree, start, len, mask);
- if (!eb || IS_ERR(eb))
- return NULL;
-
- if (eb->flags & EXTENT_BUFFER_FILLED)
- goto lru_add;
-
- for (i = 0; i < num_pages; i++, index++) {
- p = find_lock_page(mapping, index);
- if (!p) {
- goto fail;
- }
- set_page_extent_mapped(p);
- mark_page_accessed(p);
-
- if (i == 0) {
- eb->first_page = p;
- set_page_extent_head(p, len);
- } else {
- set_page_private(p, EXTENT_PAGE_PRIVATE);
- }
-
- if (!PageUptodate(p))
- uptodate = 0;
- unlock_page(p);
- }
- if (uptodate)
- eb->flags |= EXTENT_UPTODATE;
- eb->flags |= EXTENT_BUFFER_FILLED;
-
-lru_add:
- spin_lock(&tree->lru_lock);
- add_lru(tree, eb);
- spin_unlock(&tree->lru_lock);
- return eb;
-fail:
- spin_lock(&tree->lru_lock);
- list_del_init(&eb->lru);
- spin_unlock(&tree->lru_lock);
- if (!atomic_dec_and_test(&eb->refs))
- return NULL;
- for (index = 1; index < i; index++) {
- page_cache_release(extent_buffer_page(eb, index));
- }
- if (i > 0)
- page_cache_release(extent_buffer_page(eb, 0));
- __free_extent_buffer(eb);
- return NULL;
-}
-EXPORT_SYMBOL(find_extent_buffer);
-
-void free_extent_buffer(struct extent_buffer *eb)
-{
- unsigned long i;
- unsigned long num_pages;
-
- if (!eb)
- return;
-
- if (!atomic_dec_and_test(&eb->refs))
- return;
-
- WARN_ON(!list_empty(&eb->lru));
- num_pages = num_extent_pages(eb->start, eb->len);
-
- for (i = 1; i < num_pages; i++) {
- page_cache_release(extent_buffer_page(eb, i));
- }
- page_cache_release(extent_buffer_page(eb, 0));
- __free_extent_buffer(eb);
-}
-EXPORT_SYMBOL(free_extent_buffer);
-
-int clear_extent_buffer_dirty(struct extent_map_tree *tree,
- struct extent_buffer *eb)
-{
- int set;
- unsigned long i;
- unsigned long num_pages;
- struct page *page;
-
- u64 start = eb->start;
- u64 end = start + eb->len - 1;
-
- set = clear_extent_dirty(tree, start, end, GFP_NOFS);
- num_pages = num_extent_pages(eb->start, eb->len);
-
- for (i = 0; i < num_pages; i++) {
- page = extent_buffer_page(eb, i);
- lock_page(page);
- if (i == 0)
- set_page_extent_head(page, eb->len);
- else
- set_page_private(page, EXTENT_PAGE_PRIVATE);
-
- /*
- * if we're on the last page or the first page and the
- * block isn't aligned on a page boundary, do extra checks
- * to make sure we don't clean page that is partially dirty
- */
- if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
- ((i == num_pages - 1) &&
- ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
- start = (u64)page->index << PAGE_CACHE_SHIFT;
- end = start + PAGE_CACHE_SIZE - 1;
- if (test_range_bit(tree, start, end,
- EXTENT_DIRTY, 0)) {
- unlock_page(page);
- continue;
- }
- }
- clear_page_dirty_for_io(page);
- write_lock_irq(&page->mapping->tree_lock);
- if (!PageDirty(page)) {
- radix_tree_tag_clear(&page->mapping->page_tree,
- page_index(page),
- PAGECACHE_TAG_DIRTY);
- }
- write_unlock_irq(&page->mapping->tree_lock);
- unlock_page(page);
- }
- return 0;
-}
-EXPORT_SYMBOL(clear_extent_buffer_dirty);
-
-int wait_on_extent_buffer_writeback(struct extent_map_tree *tree,
- struct extent_buffer *eb)
-{
- return wait_on_extent_writeback(tree, eb->start,
- eb->start + eb->len - 1);
-}
-EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
-
-int set_extent_buffer_dirty(struct extent_map_tree *tree,
- struct extent_buffer *eb)
-{
- unsigned long i;
- unsigned long num_pages;
-
- num_pages = num_extent_pages(eb->start, eb->len);
- for (i = 0; i < num_pages; i++) {
- struct page *page = extent_buffer_page(eb, i);
- /* writepage may need to do something special for the
- * first page, we have to make sure page->private is
- * properly set. releasepage may drop page->private
- * on us if the page isn't already dirty.
- */
- if (i == 0) {
- lock_page(page);
- set_page_extent_head(page, eb->len);
- } else if (PagePrivate(page) &&
- page->private != EXTENT_PAGE_PRIVATE) {
- lock_page(page);
- set_page_extent_mapped(page);
- unlock_page(page);
- }
- __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
- if (i == 0)
- unlock_page(page);
- }
- return set_extent_dirty(tree, eb->start,
- eb->start + eb->len - 1, GFP_NOFS);
-}
-EXPORT_SYMBOL(set_extent_buffer_dirty);
-
-int set_extent_buffer_uptodate(struct extent_map_tree *tree,
- struct extent_buffer *eb)
-{
- unsigned long i;
- struct page *page;
- unsigned long num_pages;
-
- num_pages = num_extent_pages(eb->start, eb->len);
-
- set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
- GFP_NOFS);
- for (i = 0; i < num_pages; i++) {
- page = extent_buffer_page(eb, i);
- if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
- ((i == num_pages - 1) &&
- ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
- check_page_uptodate(tree, page);
- continue;
- }
- SetPageUptodate(page);
- }
- return 0;
-}
-EXPORT_SYMBOL(set_extent_buffer_uptodate);
-
-int extent_buffer_uptodate(struct extent_map_tree *tree,
- struct extent_buffer *eb)
-{
- if (eb->flags & EXTENT_UPTODATE)
- return 1;
- return test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1);
-}
-EXPORT_SYMBOL(extent_buffer_uptodate);
-
-int read_extent_buffer_pages(struct extent_map_tree *tree,
- struct extent_buffer *eb,
- u64 start,
- int wait)
-{
- unsigned long i;
- unsigned long start_i;
- struct page *page;
- int err;
- int ret = 0;
- unsigned long num_pages;
-
- if (eb->flags & EXTENT_UPTODATE)
- return 0;
-
- if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1,
- EXTENT_UPTODATE, 1)) {
- return 0;
- }
-
- if (start) {
- WARN_ON(start < eb->start);
- start_i = (start >> PAGE_CACHE_SHIFT) -
- (eb->start >> PAGE_CACHE_SHIFT);
- } else {
- start_i = 0;
- }
-
- num_pages = num_extent_pages(eb->start, eb->len);
- for (i = start_i; i < num_pages; i++) {
- page = extent_buffer_page(eb, i);
- if (PageUptodate(page)) {
- continue;
- }
- if (!wait) {
- if (TestSetPageLocked(page)) {
- continue;
- }
- } else {
- lock_page(page);
- }
- if (!PageUptodate(page)) {
- err = page->mapping->a_ops->readpage(NULL, page);
- if (err) {
- ret = err;
- }
- } else {
- unlock_page(page);
- }
- }
-
- if (ret || !wait) {
- return ret;
- }
-
- for (i = start_i; i < num_pages; i++) {
- page = extent_buffer_page(eb, i);
- wait_on_page_locked(page);
- if (!PageUptodate(page)) {
- ret = -EIO;
- }
- }
- if (!ret)
- eb->flags |= EXTENT_UPTODATE;
- return ret;
-}
-EXPORT_SYMBOL(read_extent_buffer_pages);
-
-void read_extent_buffer(struct extent_buffer *eb, void *dstv,
- unsigned long start,
- unsigned long len)
-{
- size_t cur;
- size_t offset;
- struct page *page;
- char *kaddr;
- char *dst = (char *)dstv;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
- unsigned long num_pages = num_extent_pages(eb->start, eb->len);
-
- WARN_ON(start > eb->len);
- WARN_ON(start + len > eb->start + eb->len);
-
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
-
- while(len > 0) {
- page = extent_buffer_page(eb, i);
- if (!PageUptodate(page)) {
- printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len);
- WARN_ON(1);
- }
- WARN_ON(!PageUptodate(page));
-
- cur = min(len, (PAGE_CACHE_SIZE - offset));
- kaddr = kmap_atomic(page, KM_USER1);
- memcpy(dst, kaddr + offset, cur);
- kunmap_atomic(kaddr, KM_USER1);
-
- dst += cur;
- len -= cur;
- offset = 0;
- i++;
- }
-}
-EXPORT_SYMBOL(read_extent_buffer);
-
-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
- unsigned long min_len, char **token, char **map,
- unsigned long *map_start,
- unsigned long *map_len, int km)
-{
- size_t offset = start & (PAGE_CACHE_SIZE - 1);
- char *kaddr;
- struct page *p;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
- unsigned long end_i = (start_offset + start + min_len - 1) >>
- PAGE_CACHE_SHIFT;
-
- if (i != end_i)
- return -EINVAL;
-
- if (i == 0) {
- offset = start_offset;
- *map_start = 0;
- } else {
- offset = 0;
- *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
- }
- if (start + min_len > eb->len) {
-printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len);
- WARN_ON(1);
- }
-
- p = extent_buffer_page(eb, i);
- WARN_ON(!PageUptodate(p));
- kaddr = kmap_atomic(p, km);
- *token = kaddr;
- *map = kaddr + offset;
- *map_len = PAGE_CACHE_SIZE - offset;
- return 0;
-}
-EXPORT_SYMBOL(map_private_extent_buffer);
-
-int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
- unsigned long min_len,
- char **token, char **map,
- unsigned long *map_start,
- unsigned long *map_len, int km)
-{
- int err;
- int save = 0;
- if (eb->map_token) {
- unmap_extent_buffer(eb, eb->map_token, km);
- eb->map_token = NULL;
- save = 1;
- }
- err = map_private_extent_buffer(eb, start, min_len, token, map,
- map_start, map_len, km);
- if (!err && save) {
- eb->map_token = *token;
- eb->kaddr = *map;
- eb->map_start = *map_start;
- eb->map_len = *map_len;
- }
- return err;
-}
-EXPORT_SYMBOL(map_extent_buffer);
-
-void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
-{
- kunmap_atomic(token, km);
-}
-EXPORT_SYMBOL(unmap_extent_buffer);
-
-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
- unsigned long start,
- unsigned long len)
-{
- size_t cur;
- size_t offset;
- struct page *page;
- char *kaddr;
- char *ptr = (char *)ptrv;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
- int ret = 0;
-
- WARN_ON(start > eb->len);
- WARN_ON(start + len > eb->start + eb->len);
-
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
-
- while(len > 0) {
- page = extent_buffer_page(eb, i);
- WARN_ON(!PageUptodate(page));
-
- cur = min(len, (PAGE_CACHE_SIZE - offset));
-
- kaddr = kmap_atomic(page, KM_USER0);
- ret = memcmp(ptr, kaddr + offset, cur);
- kunmap_atomic(kaddr, KM_USER0);
- if (ret)
- break;
-
- ptr += cur;
- len -= cur;
- offset = 0;
- i++;
- }
+ rb_erase(&em->rb_node, &tree->map);
+ em->in_tree = 0;
+ if (tree->last == em)
+ tree->last = NULL;
return ret;
}
-EXPORT_SYMBOL(memcmp_extent_buffer);
-
-void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
- unsigned long start, unsigned long len)
-{
- size_t cur;
- size_t offset;
- struct page *page;
- char *kaddr;
- char *src = (char *)srcv;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
-
- WARN_ON(start > eb->len);
- WARN_ON(start + len > eb->start + eb->len);
-
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
-
- while(len > 0) {
- page = extent_buffer_page(eb, i);
- WARN_ON(!PageUptodate(page));
-
- cur = min(len, PAGE_CACHE_SIZE - offset);
- kaddr = kmap_atomic(page, KM_USER1);
- memcpy(kaddr + offset, src, cur);
- kunmap_atomic(kaddr, KM_USER1);
-
- src += cur;
- len -= cur;
- offset = 0;
- i++;
- }
-}
-EXPORT_SYMBOL(write_extent_buffer);
-
-void memset_extent_buffer(struct extent_buffer *eb, char c,
- unsigned long start, unsigned long len)
-{
- size_t cur;
- size_t offset;
- struct page *page;
- char *kaddr;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
-
- WARN_ON(start > eb->len);
- WARN_ON(start + len > eb->start + eb->len);
-
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
-
- while(len > 0) {
- page = extent_buffer_page(eb, i);
- WARN_ON(!PageUptodate(page));
-
- cur = min(len, PAGE_CACHE_SIZE - offset);
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + offset, c, cur);
- kunmap_atomic(kaddr, KM_USER0);
-
- len -= cur;
- offset = 0;
- i++;
- }
-}
-EXPORT_SYMBOL(memset_extent_buffer);
-
-void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
- unsigned long dst_offset, unsigned long src_offset,
- unsigned long len)
-{
- u64 dst_len = dst->len;
- size_t cur;
- size_t offset;
- struct page *page;
- char *kaddr;
- size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
-
- WARN_ON(src->len != dst_len);
-
- offset = (start_offset + dst_offset) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
-
- while(len > 0) {
- page = extent_buffer_page(dst, i);
- WARN_ON(!PageUptodate(page));
-
- cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
-
- kaddr = kmap_atomic(page, KM_USER0);
- read_extent_buffer(src, kaddr + offset, src_offset, cur);
- kunmap_atomic(kaddr, KM_USER0);
-
- src_offset += cur;
- len -= cur;
- offset = 0;
- i++;
- }
-}
-EXPORT_SYMBOL(copy_extent_buffer);
-
-static void move_pages(struct page *dst_page, struct page *src_page,
- unsigned long dst_off, unsigned long src_off,
- unsigned long len)
-{
- char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
- if (dst_page == src_page) {
- memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
- } else {
- char *src_kaddr = kmap_atomic(src_page, KM_USER1);
- char *p = dst_kaddr + dst_off + len;
- char *s = src_kaddr + src_off + len;
-
- while (len--)
- *--p = *--s;
-
- kunmap_atomic(src_kaddr, KM_USER1);
- }
- kunmap_atomic(dst_kaddr, KM_USER0);
-}
-
-static void copy_pages(struct page *dst_page, struct page *src_page,
- unsigned long dst_off, unsigned long src_off,
- unsigned long len)
-{
- char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
- char *src_kaddr;
-
- if (dst_page != src_page)
- src_kaddr = kmap_atomic(src_page, KM_USER1);
- else
- src_kaddr = dst_kaddr;
-
- memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
- kunmap_atomic(dst_kaddr, KM_USER0);
- if (dst_page != src_page)
- kunmap_atomic(src_kaddr, KM_USER1);
-}
-
-void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
- unsigned long src_offset, unsigned long len)
-{
- size_t cur;
- size_t dst_off_in_page;
- size_t src_off_in_page;
- size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long dst_i;
- unsigned long src_i;
-
- if (src_offset + len > dst->len) {
- printk("memmove bogus src_offset %lu move len %lu len %lu\n",
- src_offset, len, dst->len);
- BUG_ON(1);
- }
- if (dst_offset + len > dst->len) {
- printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
- dst_offset, len, dst->len);
- BUG_ON(1);
- }
-
- while(len > 0) {
- dst_off_in_page = (start_offset + dst_offset) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
- src_off_in_page = (start_offset + src_offset) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
-
- dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
- src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
-
- cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
- src_off_in_page));
- cur = min_t(unsigned long, cur,
- (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
-
- copy_pages(extent_buffer_page(dst, dst_i),
- extent_buffer_page(dst, src_i),
- dst_off_in_page, src_off_in_page, cur);
-
- src_offset += cur;
- dst_offset += cur;
- len -= cur;
- }
-}
-EXPORT_SYMBOL(memcpy_extent_buffer);
-
-void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
- unsigned long src_offset, unsigned long len)
-{
- size_t cur;
- size_t dst_off_in_page;
- size_t src_off_in_page;
- unsigned long dst_end = dst_offset + len - 1;
- unsigned long src_end = src_offset + len - 1;
- size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long dst_i;
- unsigned long src_i;
-
- if (src_offset + len > dst->len) {
- printk("memmove bogus src_offset %lu move len %lu len %lu\n",
- src_offset, len, dst->len);
- BUG_ON(1);
- }
- if (dst_offset + len > dst->len) {
- printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
- dst_offset, len, dst->len);
- BUG_ON(1);
- }
- if (dst_offset < src_offset) {
- memcpy_extent_buffer(dst, dst_offset, src_offset, len);
- return;
- }
- while(len > 0) {
- dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
- src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
-
- dst_off_in_page = (start_offset + dst_end) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
- src_off_in_page = (start_offset + src_end) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
-
- cur = min_t(unsigned long, len, src_off_in_page + 1);
- cur = min(cur, dst_off_in_page + 1);
- move_pages(extent_buffer_page(dst, dst_i),
- extent_buffer_page(dst, src_i),
- dst_off_in_page - cur + 1,
- src_off_in_page - cur + 1, cur);
-
- dst_end -= cur;
- src_end -= cur;
- len -= cur;
- }
-}
-EXPORT_SYMBOL(memmove_extent_buffer);
+EXPORT_SYMBOL(remove_extent_mapping);
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index ea60f5447b5b..56314217cfc0 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -3,215 +3,53 @@
#include <linux/rbtree.h>
+#define EXTENT_MAP_LAST_BYTE (u64)-4
#define EXTENT_MAP_HOLE (u64)-3
#define EXTENT_MAP_INLINE (u64)-2
#define EXTENT_MAP_DELALLOC (u64)-1
-/* bits for the extent state */
-#define EXTENT_DIRTY 1
-#define EXTENT_WRITEBACK (1 << 1)
-#define EXTENT_UPTODATE (1 << 2)
-#define EXTENT_LOCKED (1 << 3)
-#define EXTENT_NEW (1 << 4)
-#define EXTENT_DELALLOC (1 << 5)
-#define EXTENT_DEFRAG (1 << 6)
-#define EXTENT_DEFRAG_DONE (1 << 7)
-#define EXTENT_BUFFER_FILLED (1 << 8)
-#define EXTENT_CSUM (1 << 9)
-#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
-
-/*
- * page->private values. Every page that is controlled by the extent
- * map has page->private set to one.
- */
-#define EXTENT_PAGE_PRIVATE 1
-#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3
-
-
-struct extent_map_ops {
- int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
- int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
- int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
- int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end);
- void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end);
-};
-
-struct extent_map_tree {
- struct rb_root map;
- struct rb_root state;
- struct address_space *mapping;
- u64 dirty_bytes;
- rwlock_t lock;
- struct extent_map_ops *ops;
- spinlock_t lru_lock;
- struct list_head buffer_lru;
- int lru_size;
-};
-
-/* note, this must start with the same fields as fs/extent_map.c:tree_entry */
struct extent_map {
- u64 start;
- u64 end; /* inclusive */
- int in_tree;
struct rb_node rb_node;
- /* block_start and block_end are in bytes */
+
+ /* all of these are in bytes */
+ u64 start;
+ u64 len;
u64 block_start;
- u64 block_end; /* inclusive */
+ unsigned long flags;
struct block_device *bdev;
atomic_t refs;
-};
-
-/* note, this must start with the same fields as fs/extent_map.c:tree_entry */
-struct extent_state {
- u64 start;
- u64 end; /* inclusive */
int in_tree;
- struct rb_node rb_node;
- wait_queue_head_t wq;
- atomic_t refs;
- unsigned long state;
-
- /* for use by the FS */
- u64 private;
-
- struct list_head list;
};
-struct extent_buffer {
- u64 start;
- unsigned long len;
- char *map_token;
- char *kaddr;
- unsigned long map_start;
- unsigned long map_len;
- struct page *first_page;
- struct list_head lru;
- atomic_t refs;
- int flags;
+struct extent_map_tree {
+ struct rb_root map;
+ struct extent_map *last;
+ spinlock_t lock;
};
-typedef struct extent_map *(get_extent_t)(struct inode *inode,
- struct page *page,
- size_t page_offset,
- u64 start, u64 end,
- int create);
+static inline u64 extent_map_end(struct extent_map *em)
+{
+ if (em->start + em->len < em->start)
+ return (u64)-1;
+ return em->start + em->len;
+}
+
+static inline u64 extent_map_block_end(struct extent_map *em)
+{
+ if (em->block_start + em->len < em->block_start)
+ return (u64)-1;
+ return em->block_start + em->len;
+}
-void extent_map_tree_init(struct extent_map_tree *tree,
- struct address_space *mapping, gfp_t mask);
-void extent_map_tree_empty_lru(struct extent_map_tree *tree);
+void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask);
struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 end);
+ u64 start, u64 len);
int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em);
int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
-int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page);
-int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask);
-int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask);
+
struct extent_map *alloc_extent_map(gfp_t mask);
void free_extent_map(struct extent_map *em);
-int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
- get_extent_t *get_extent);
int __init extent_map_init(void);
void extent_map_exit(void);
-
-u64 count_range_bits(struct extent_map_tree *tree,
- u64 *start, u64 search_end,
- u64 max_bytes, unsigned long bits);
-
-int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
- int bits, int filled);
-int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
- int bits, gfp_t mask);
-int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
- int bits, gfp_t mask);
-int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
- gfp_t mask);
-int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
- gfp_t mask);
-int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
- gfp_t mask);
-int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
- gfp_t mask);
-int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end,
- gfp_t mask);
-int find_first_extent_bit(struct extent_map_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, int bits);
-int extent_invalidatepage(struct extent_map_tree *tree,
- struct page *page, unsigned long offset);
-int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
- get_extent_t *get_extent,
- struct writeback_control *wbc);
-int extent_writepages(struct extent_map_tree *tree,
- struct address_space *mapping,
- get_extent_t *get_extent,
- struct writeback_control *wbc);
-int extent_readpages(struct extent_map_tree *tree,
- struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages,
- get_extent_t get_extent);
-int extent_prepare_write(struct extent_map_tree *tree,
- struct inode *inode, struct page *page,
- unsigned from, unsigned to, get_extent_t *get_extent);
-int extent_commit_write(struct extent_map_tree *tree,
- struct inode *inode, struct page *page,
- unsigned from, unsigned to);
-sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
- get_extent_t *get_extent);
-int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end);
-int set_state_private(struct extent_map_tree *tree, u64 start, u64 private);
-int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private);
-void set_page_extent_mapped(struct page *page);
-
-struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
- u64 start, unsigned long len,
- struct page *page0,
- gfp_t mask);
-struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
- u64 start, unsigned long len,
- gfp_t mask);
-void free_extent_buffer(struct extent_buffer *eb);
-int read_extent_buffer_pages(struct extent_map_tree *tree,
- struct extent_buffer *eb, u64 start, int wait);
-
-static inline void extent_buffer_get(struct extent_buffer *eb)
-{
- atomic_inc(&eb->refs);
-}
-
-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
- unsigned long start,
- unsigned long len);
-void read_extent_buffer(struct extent_buffer *eb, void *dst,
- unsigned long start,
- unsigned long len);
-void write_extent_buffer(struct extent_buffer *eb, const void *src,
- unsigned long start, unsigned long len);
-void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
- unsigned long dst_offset, unsigned long src_offset,
- unsigned long len);
-void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
- unsigned long src_offset, unsigned long len);
-void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
- unsigned long src_offset, unsigned long len);
-void memset_extent_buffer(struct extent_buffer *eb, char c,
- unsigned long start, unsigned long len);
-int wait_on_extent_buffer_writeback(struct extent_map_tree *tree,
- struct extent_buffer *eb);
-int clear_extent_buffer_dirty(struct extent_map_tree *tree,
- struct extent_buffer *eb);
-int set_extent_buffer_dirty(struct extent_map_tree *tree,
- struct extent_buffer *eb);
-int set_extent_buffer_uptodate(struct extent_map_tree *tree,
- struct extent_buffer *eb);
-int extent_buffer_uptodate(struct extent_map_tree *tree,
- struct extent_buffer *eb);
-int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
- unsigned long min_len, char **token, char **map,
- unsigned long *map_start,
- unsigned long *map_len, int km);
-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
- unsigned long min_len, char **token, char **map,
- unsigned long *map_start,
- unsigned long *map_len, int km);
-void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km);
#endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1cd8c908811e..c5bb00f92396 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -233,8 +233,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
int err = 0;
int i;
struct inode *inode = fdentry(file)->d_inode;
- struct extent_map *em;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
u64 hint_byte;
u64 num_bytes;
u64 start_pos;
@@ -242,11 +241,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
u64 end_pos = pos + write_bytes;
u64 inline_size;
loff_t isize = i_size_read(inode);
- em = alloc_extent_map(GFP_NOFS);
- if (!em)
- return -ENOMEM;
-
- em->bdev = inode->i_sb->s_bdev;
start_pos = pos & ~((u64)root->sectorsize - 1);
num_bytes = (write_bytes + pos - start_pos +
@@ -254,7 +248,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
end_of_last_block = start_pos + num_bytes - 1;
- lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
+ lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
mutex_lock(&root->fs_info->fs_mutex);
trans = btrfs_start_transaction(root, 1);
if (!trans) {
@@ -268,7 +262,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
if ((end_of_last_block & 4095) == 0) {
printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block);
}
- set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS);
+ set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
/* FIXME...EIEIO, ENOSPC and more */
@@ -293,6 +287,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
inode->i_ino,
last_pos_in_file,
0, 0, hole_size);
+ btrfs_drop_extent_cache(inode, last_pos_in_file,
+ last_pos_in_file + hole_size -1);
btrfs_check_file(root, inode);
}
if (err)
@@ -320,12 +316,12 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
last_end += PAGE_CACHE_SIZE - 1;
if (start_pos < isize) {
u64 delalloc_start = start_pos;
- existing_delalloc = count_range_bits(em_tree,
+ existing_delalloc = count_range_bits(io_tree,
&delalloc_start,
end_of_last_block, (u64)-1,
EXTENT_DELALLOC);
}
- set_extent_delalloc(em_tree, start_pos, end_of_last_block,
+ set_extent_delalloc(io_tree, start_pos, end_of_last_block,
GFP_NOFS);
spin_lock(&root->fs_info->delalloc_lock);
root->fs_info->delalloc_bytes += (end_of_last_block + 1 -
@@ -346,6 +342,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
inline_size -= start_pos;
err = insert_inline_extent(trans, root, inode, start_pos,
inline_size, pages, 0, num_pages);
+ btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1);
BUG_ON(err);
}
if (end_pos > isize) {
@@ -356,8 +353,7 @@ failed:
err = btrfs_end_transaction(trans, root);
out_unlock:
mutex_unlock(&root->fs_info->fs_mutex);
- unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS);
- free_extent_map(em);
+ unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
return err;
}
@@ -367,10 +363,15 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
while(1) {
+ spin_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, end);
- if (!em)
+ if (!em) {
+ spin_unlock(&em_tree->lock);
break;
+ }
remove_extent_mapping(em_tree, em);
+ spin_unlock(&em_tree->lock);
+
/* once for us */
free_extent_map(em);
/* once for the tree*/
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 67005480e139..16d3aef45d18 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -53,7 +53,7 @@ static struct inode_operations btrfs_file_inode_operations;
static struct address_space_operations btrfs_aops;
static struct address_space_operations btrfs_symlink_aops;
static struct file_operations btrfs_dir_file_operations;
-static struct extent_map_ops btrfs_extent_map_ops;
+static struct extent_io_ops btrfs_extent_io_ops;
static struct kmem_cache *btrfs_inode_cachep;
struct kmem_cache *btrfs_trans_handle_cachep;
@@ -104,6 +104,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
u64 num_bytes;
u64 cur_alloc_size;
u64 blocksize = root->sectorsize;
+ u64 orig_start = start;
+ u64 orig_num_bytes;
struct btrfs_key ins;
int ret;
@@ -115,6 +117,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
num_bytes = max(blocksize, num_bytes);
ret = btrfs_drop_extents(trans, root, inode,
start, start + num_bytes, start, &alloc_hint);
+ orig_num_bytes = num_bytes;
if (alloc_hint == EXTENT_MAP_INLINE)
goto out;
@@ -138,6 +141,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
}
+ btrfs_drop_extent_cache(inode, orig_start,
+ orig_start + orig_num_bytes - 1);
btrfs_add_ordered_inode(inode);
out:
btrfs_end_transaction(trans, root);
@@ -297,7 +302,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
int ret = 0;
struct inode *inode = page->mapping->host;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_csum_item *item;
struct btrfs_path *path = NULL;
u32 csum;
@@ -317,7 +322,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
}
read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
BTRFS_CRC32_SIZE);
- set_state_private(em_tree, start, csum);
+ set_state_private(io_tree, start, csum);
out:
if (path)
btrfs_free_path(path);
@@ -329,17 +334,19 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end)
{
size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
struct inode *inode = page->mapping->host;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
char *kaddr;
u64 private;
int ret;
struct btrfs_root *root = BTRFS_I(inode)->root;
u32 csum = ~(u32)0;
unsigned long flags;
+
if (btrfs_test_opt(root, NODATASUM) ||
btrfs_test_flag(inode, NODATASUM))
return 0;
- ret = get_state_private(em_tree, start, &private);
+
+ ret = get_state_private(io_tree, start, &private);
local_irq_save(flags);
kaddr = kmap_atomic(page, KM_IRQ0);
if (ret) {
@@ -428,7 +435,7 @@ void btrfs_read_locked_inode(struct inode *inode)
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_mapping->a_ops = &btrfs_aops;
- BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
+ BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
break;
@@ -873,7 +880,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page,
size_t zero_start)
{
char *kaddr;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
@@ -884,12 +891,12 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page,
WARN_ON(!PageLocked(page));
set_page_extent_mapped(page);
- lock_extent(em_tree, page_start, page_end, GFP_NOFS);
+ lock_extent(io_tree, page_start, page_end, GFP_NOFS);
delalloc_start = page_start;
- existing_delalloc = count_range_bits(&BTRFS_I(inode)->extent_tree,
+ existing_delalloc = count_range_bits(&BTRFS_I(inode)->io_tree,
&delalloc_start, page_end,
PAGE_CACHE_SIZE, EXTENT_DELALLOC);
- set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start,
+ set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
page_end, GFP_NOFS);
spin_lock(&root->fs_info->delalloc_lock);
@@ -903,7 +910,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page,
kunmap(page);
}
set_page_dirty(page);
- unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
+ unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
return ret;
}
@@ -961,7 +968,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
u64 mask = root->sectorsize - 1;
u64 pos = (inode->i_size + mask) & ~mask;
@@ -986,7 +993,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
btrfs_truncate_page(inode->i_mapping, inode->i_size);
- lock_extent(em_tree, pos, block_end, GFP_NOFS);
+ lock_extent(io_tree, pos, block_end, GFP_NOFS);
hole_size = block_end - hole_start;
mutex_lock(&root->fs_info->fs_mutex);
@@ -1001,11 +1008,13 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
inode->i_ino,
hole_start, 0, 0,
hole_size);
+ btrfs_drop_extent_cache(inode, hole_start,
+ hole_size - 1);
btrfs_check_file(root, inode);
}
btrfs_end_transaction(trans, root);
mutex_unlock(&root->fs_info->fs_mutex);
- unlock_extent(em_tree, pos, block_end, GFP_NOFS);
+ unlock_extent(io_tree, pos, block_end, GFP_NOFS);
if (err)
return err;
}
@@ -1189,7 +1198,8 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
struct btrfs_iget_args *args = p;
inode->i_ino = args->ino;
BTRFS_I(inode)->root = args->root;
- extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
+ extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
+ extent_io_tree_init(&BTRFS_I(inode)->io_tree,
inode->i_mapping, GFP_NOFS);
return 0;
}
@@ -1485,7 +1495,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
if (!inode)
return ERR_PTR(-ENOMEM);
- extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
+ extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
+ extent_io_tree_init(&BTRFS_I(inode)->io_tree,
inode->i_mapping, GFP_NOFS);
BTRFS_I(inode)->root = root;
@@ -1672,9 +1683,10 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
inode->i_mapping->a_ops = &btrfs_aops;
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
- extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
+ extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
+ extent_io_tree_init(&BTRFS_I(inode)->io_tree,
inode->i_mapping, GFP_NOFS);
- BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
+ BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
}
dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, inode);
@@ -1816,7 +1828,7 @@ out_unlock:
}
struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
- size_t page_offset, u64 start, u64 end,
+ size_t page_offset, u64 start, u64 len,
int create)
{
int ret;
@@ -1826,7 +1838,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
u64 extent_end = 0;
u64 objectid = inode->i_ino;
u32 found_type;
- int failed_insert = 0;
struct btrfs_path *path;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_file_extent_item *item;
@@ -1834,6 +1845,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
struct btrfs_key found_key;
struct extent_map *em = NULL;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_trans_handle *trans = NULL;
path = btrfs_alloc_path();
@@ -1841,24 +1853,26 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
mutex_lock(&root->fs_info->fs_mutex);
again:
- em = lookup_extent_mapping(em_tree, start, end);
+ spin_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, start, len);
+ spin_unlock(&em_tree->lock);
+
if (em) {
if (em->start > start) {
- printk("get_extent start %Lu em start %Lu\n",
- start, em->start);
+ printk("get_extent lookup [%Lu %Lu] em [%Lu %Lu]\n",
+ start, len, em->start, em->len);
WARN_ON(1);
}
goto out;
}
+ em = alloc_extent_map(GFP_NOFS);
if (!em) {
- em = alloc_extent_map(GFP_NOFS);
- if (!em) {
- err = -ENOMEM;
- goto out;
- }
- em->start = EXTENT_MAP_HOLE;
- em->end = EXTENT_MAP_HOLE;
+ err = -ENOMEM;
+ goto out;
}
+
+ em->start = EXTENT_MAP_HOLE;
+ em->len = (u64)-1;
em->bdev = inode->i_sb->s_bdev;
ret = btrfs_lookup_file_extent(trans, root, path,
objectid, start, trans != NULL);
@@ -1893,28 +1907,25 @@ again:
if (start < extent_start || start >= extent_end) {
em->start = start;
if (start < extent_start) {
- if (end < extent_start)
+ if (start + len <= extent_start)
goto not_found;
- em->end = extent_end - 1;
+ em->len = extent_end - extent_start;
} else {
- em->end = end;
+ em->len = len;
}
goto not_found_em;
}
bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
if (bytenr == 0) {
em->start = extent_start;
- em->end = extent_end - 1;
+ em->len = extent_end - extent_start;
em->block_start = EXTENT_MAP_HOLE;
- em->block_end = EXTENT_MAP_HOLE;
goto insert;
}
bytenr += btrfs_file_extent_offset(leaf, item);
em->block_start = bytenr;
- em->block_end = em->block_start +
- btrfs_file_extent_num_bytes(leaf, item) - 1;
em->start = extent_start;
- em->end = extent_end - 1;
+ em->len = extent_end - extent_start;
goto insert;
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
unsigned long ptr;
@@ -1925,25 +1936,24 @@ again:
size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
path->slots[0]));
- extent_end = (extent_start + size - 1) |
- ((u64)root->sectorsize - 1);
+ extent_end = (extent_start + size + root->sectorsize - 1) &
+ ~((u64)root->sectorsize - 1);
if (start < extent_start || start >= extent_end) {
em->start = start;
if (start < extent_start) {
- if (end < extent_start)
+ if (start + len <= extent_start)
goto not_found;
- em->end = extent_end;
+ em->len = extent_end - extent_start;
} else {
- em->end = end;
+ em->len = len;
}
goto not_found_em;
}
em->block_start = EXTENT_MAP_INLINE;
- em->block_end = EXTENT_MAP_INLINE;
if (!page) {
em->start = extent_start;
- em->end = extent_start + size - 1;
+ em->len = size;
goto out;
}
@@ -1952,8 +1962,7 @@ again:
copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset,
size - extent_offset);
em->start = extent_start + extent_offset;
- em->end = (em->start + copy_size -1) |
- ((u64)root->sectorsize -1);
+ em->len = copy_size;
map = kmap(page);
ptr = btrfs_file_extent_inline_start(item) + extent_offset;
if (create == 0 && !PageUptodate(page)) {
@@ -1974,7 +1983,8 @@ again:
btrfs_mark_buffer_dirty(leaf);
}
kunmap(page);
- set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS);
+ set_extent_uptodate(io_tree, em->start,
+ extent_map_end(em) - 1, GFP_NOFS);
goto insert;
} else {
printk("unkknown found_type %d\n", found_type);
@@ -1982,33 +1992,29 @@ again:
}
not_found:
em->start = start;
- em->end = end;
+ em->len = len;
not_found_em:
em->block_start = EXTENT_MAP_HOLE;
- em->block_end = EXTENT_MAP_HOLE;
insert:
btrfs_release_path(root, path);
- if (em->start > start || em->end < start) {
- printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->end, start, end);
+ if (em->start > start || extent_map_end(em) <= start) {
+ printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len);
err = -EIO;
goto out;
}
+
+ err = 0;
+ spin_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
if (ret == -EEXIST) {
free_extent_map(em);
- em = NULL;
- if (0 && failed_insert == 1) {
- btrfs_drop_extent_cache(inode, start, end);
- }
- failed_insert++;
- if (failed_insert > 5) {
- printk("failing to insert %Lu %Lu\n", start, end);
+ em = lookup_extent_mapping(em_tree, start, len);
+ if (!em) {
err = -EIO;
- goto out;
+ printk("failing to insert %Lu %Lu\n", start, len);
}
- goto again;
}
- err = 0;
+ spin_unlock(&em_tree->lock);
out:
btrfs_free_path(path);
if (trans) {
@@ -2032,14 +2038,14 @@ static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
int btrfs_readpage(struct file *file, struct page *page)
{
- struct extent_map_tree *tree;
- tree = &BTRFS_I(page->mapping->host)->extent_tree;
+ struct extent_io_tree *tree;
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
return extent_read_full_page(tree, page, btrfs_get_extent);
}
static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
{
- struct extent_map_tree *tree;
+ struct extent_io_tree *tree;
if (current->flags & PF_MEMALLOC) {
@@ -2047,15 +2053,15 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
unlock_page(page);
return 0;
}
- tree = &BTRFS_I(page->mapping->host)->extent_tree;
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
}
static int btrfs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- struct extent_map_tree *tree;
- tree = &BTRFS_I(mapping->host)->extent_tree;
+ struct extent_io_tree *tree;
+ tree = &BTRFS_I(mapping->host)->io_tree;
return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
}
@@ -2063,19 +2069,21 @@ static int
btrfs_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
- struct extent_map_tree *tree;
- tree = &BTRFS_I(mapping->host)->extent_tree;
+ struct extent_io_tree *tree;
+ tree = &BTRFS_I(mapping->host)->io_tree;
return extent_readpages(tree, mapping, pages, nr_pages,
btrfs_get_extent);
}
static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
{
- struct extent_map_tree *tree;
+ struct extent_io_tree *tree;
+ struct extent_map_tree *map;
int ret;
- tree = &BTRFS_I(page->mapping->host)->extent_tree;
- ret = try_release_extent_mapping(tree, page);
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
+ map = &BTRFS_I(page->mapping->host)->extent_tree;
+ ret = try_release_extent_mapping(map, tree, page);
if (ret == 1) {
ClearPagePrivate(page);
set_page_private(page, 0);
@@ -2086,9 +2094,9 @@ static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
static void btrfs_invalidatepage(struct page *page, unsigned long offset)
{
- struct extent_map_tree *tree;
+ struct extent_io_tree *tree;
- tree = &BTRFS_I(page->mapping->host)->extent_tree;
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
extent_invalidatepage(tree, page, offset);
btrfs_releasepage(page, GFP_NOFS);
}
@@ -2374,7 +2382,7 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
int btrfs_defrag_file(struct file *file) {
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct page *page;
unsigned long last_index;
unsigned long ra_index = 0;
@@ -2414,13 +2422,13 @@ int btrfs_defrag_file(struct file *file) {
page_start = (u64)page->index << PAGE_CACHE_SHIFT;
page_end = page_start + PAGE_CACHE_SIZE - 1;
- lock_extent(em_tree, page_start, page_end, GFP_NOFS);
+ lock_extent(io_tree, page_start, page_end, GFP_NOFS);
delalloc_start = page_start;
existing_delalloc =
- count_range_bits(&BTRFS_I(inode)->extent_tree,
+ count_range_bits(&BTRFS_I(inode)->io_tree,
&delalloc_start, page_end,
PAGE_CACHE_SIZE, EXTENT_DELALLOC);
- set_extent_delalloc(em_tree, page_start,
+ set_extent_delalloc(io_tree, page_start,
page_end, GFP_NOFS);
spin_lock(&root->fs_info->delalloc_lock);
@@ -2428,7 +2436,7 @@ int btrfs_defrag_file(struct file *file) {
existing_delalloc;
spin_unlock(&root->fs_info->delalloc_lock);
- unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
+ unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
set_page_dirty(page);
unlock_page(page);
page_cache_release(page);
@@ -2842,9 +2850,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
inode->i_mapping->a_ops = &btrfs_aops;
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
- extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
+ extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
+ extent_io_tree_init(&BTRFS_I(inode)->io_tree,
inode->i_mapping, GFP_NOFS);
- BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
+ BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
}
dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, inode);
@@ -2934,7 +2943,7 @@ static struct file_operations btrfs_dir_file_operations = {
#endif
};
-static struct extent_map_ops btrfs_extent_map_ops = {
+static struct extent_io_ops btrfs_extent_io_ops = {
.fill_delalloc = run_delalloc_range,
.writepage_io_hook = btrfs_writepage_io_hook,
.readpage_io_hook = btrfs_readpage_io_hook,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8b52c69fda2e..f8a1016600b1 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -468,10 +468,15 @@ static int __init init_btrfs_fs(void)
err = btrfs_init_cachep();
if (err)
goto free_transaction_sys;
- err = extent_map_init();
+
+ err = extent_io_init();
if (err)
goto free_cachep;
+ err = extent_map_init();
+ if (err)
+ goto free_extent_io;
+
err = register_filesystem(&btrfs_fs_type);
if (err)
goto free_extent_map;
@@ -479,6 +484,8 @@ static int __init init_btrfs_fs(void)
free_extent_map:
extent_map_exit();
+free_extent_io:
+ extent_io_exit();
free_cachep:
btrfs_destroy_cachep();
free_transaction_sys:
@@ -492,6 +499,7 @@ static void __exit exit_btrfs_fs(void)
btrfs_exit_transaction_sys();
btrfs_destroy_cachep();
extent_map_exit();
+ extent_io_exit();
unregister_filesystem(&btrfs_fs_type);
btrfs_exit_sysfs();
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 163c01a24498..b4a1bc62a784 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -70,7 +70,7 @@ static int join_transaction(struct btrfs_root *root)
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree);
- extent_map_tree_init(&cur_trans->dirty_pages,
+ extent_io_tree_init(&cur_trans->dirty_pages,
root->fs_info->btree_inode->i_mapping,
GFP_NOFS);
} else {
@@ -153,7 +153,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
int ret;
int err;
int werr = 0;
- struct extent_map_tree *dirty_pages;
+ struct extent_io_tree *dirty_pages;
struct page *page;
struct inode *btree_inode = root->fs_info->btree_inode;
u64 start;
@@ -610,7 +610,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_transaction *cur_trans;
struct btrfs_transaction *prev_trans = NULL;
struct list_head dirty_fs_roots;
- struct extent_map_tree *pinned_copy;
+ struct extent_io_tree *pinned_copy;
DEFINE_WAIT(wait);
int ret;
@@ -639,7 +639,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
if (!pinned_copy)
return -ENOMEM;
- extent_map_tree_init(pinned_copy,
+ extent_io_tree_init(pinned_copy,
root->fs_info->btree_inode->i_mapping, GFP_NOFS);
trans->transaction->in_commit = 1;
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index fd52e9b23922..c3172ddb3321 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -29,7 +29,7 @@ struct btrfs_transaction {
int use_count;
int commit_done;
struct list_head list;
- struct extent_map_tree dirty_pages;
+ struct extent_io_tree dirty_pages;
unsigned long start_time;
struct btrfs_ordered_inode_tree ordered_inode_tree;
wait_queue_head_t writer_wait;